Hacker Newsnew | past | comments | ask | show | jobs | submitlogin
Text Processing Practice Expt: 27 SERP Types to SQLite (yy084)
3 points by 1vuio0pswjnm7 on Jan 19, 2024 | hide | past | favorite | 12 comments

     /*
     make sqlite from s.e.r.p.
     */
    
     int fileno (FILE *);
     FILE *f;
     #define jmp (yy_start) = 1 + 2 *
     int mode;
     int v,w,x,y,z;
     int utf8,tag;
     int datelen,querylen,urllen,desclen;
     char desc[255];
     char url[255];
     char *a;
     int setenv (const char *, const char *, int);
     int printf(const char *__restrict, ...);
     int dprintf(int, const char *__restrict, ...);
     unsigned int time(unsigned int *tloc);
     int CDp8iULPuCinsert(int xyz) 
     {
     fwrite("INSERT or IGNORE INTO t1 VALUES(",1,32,stdout);
     printf("%03d,'",xyz);
     fwrite(getenv("query"),1,querylen,stdout); 
     fwrite("','",1,3,yyout);
     fwrite(getenv("date"),1,datelen,yyout);
     fwrite("','",1,3,yyout);
     return 0;
     }
    %s M1 M2 M3 M4 M5 M6 M7 M9 
    %s M10 M11 M12 M13 M14 M15 M16 
    %s M17 M18 M19 M20 M21 M22 M23  
    %s M24 M25 M26 M27
    %s M2_DESC M3_DESC M5_DESC M11_URL 
    %s M11_DESC1 M11_DESC2 M13_DESC M24_DESC 
    %s M25_DESC M27_DESC
    %option nounput noinput noyywrap 
    %%
    
    TWVGHkppk6Q6cODqP7Kqdate:\"[^\"]+ {
     y=0;for(x=26;x<yyleng;x++){if(yytext[x])yytext[y]=yytext[x];
     if(yytext[yyleng])yytext[yyleng]=0;y++;};
     setenv("date",yytext,1);
     datelen=y;
     }
    TWVGHkppk6Q6cODqP7Kqquery:\"[^\"]+ {
     y=0;for(x=27;x<yyleng;x++){if(yytext[x])yytext[y]=yytext[x];
     if(yytext[yyleng])yytext[yyleng]=0;y++;};
     setenv("query",yytext,1);
     querylen=y;
     }
    TWVGHkppk6Q6cODqP7Kqmode:\"[^\"]+ {
     y=0;for(x=26;x<yyleng;x++){if(yytext[x])yytext[y]=yytext[x];
     if(yytext[yyleng])yytext[yyleng]=0;y++;};
     mode=atoi(yytext);
     switch(mode){ 
     case 1:  jmp M1;break;  /* duckduckgo     */
     case 2:  jmp M2;break;  /* google         */
     case 3:  jmp M3;break;  /* qwant          */
     case 4:  jmp M4;break;  /* startpage      */
     case 5:  jmp M5;break;  /* github         */
     case 6:  jmp M6;break;  /* google news    */
     case 7:  jmp M7;break;  /* mojeek         */
     case 10: jmp M10;break; /* f-droid        */
     case 11: jmp M11;break; /* bing           */
     case 13: jmp M13;break; /* yandex         */
     case 14: jmp M14;break; /* tailsx         */
     case 15: jmp M15;break; /* google scholar */
     case 16: jmp M16;break; /* ecosia         */
     case 17: jmp M17;break; /* wiby           */
     case 18: jmp M18;break; /* marginalia     */
     case 19: jmp M19;break; /* yahoo          */
     case 20: jmp M20;break; /* youtube        */
     case 21: jmp M21;break; /* wikipedia      */
     case 22: jmp M22;break; /* pubmed         */
     case 23: jmp M23;break; /* brave          */
     case 24: jmp M24;break; /* mwmbl          */
     case 25: jmp M25;break; /* crossref       */
     case 26: jmp M26;break; /* aol            */
     case 27: jmp M27;break; /* sec            */
     default: break;
     }
     }
    ^HTTP\/1.[01][ ]200[ ]O[Kk]\r {
     utf8=0;tag=0;urllen=0;desclen=0;v=0;w=0;x=0;y=0;z=0;}
       /* INSERT SERPS HERE */
    
    .|\n 
    %%
     int main(int argc,char *argv[]){
     if(argc>2)
     {
     printf("TWVGHkppk6Q6cODqP7Kqmode:\"%s\"\n",argv[1]); 
     fwrite("TWVGHkppk6Q6cODqP7Kqquery:\"",1,27,stdout);
     if(*argv){++argv;++argv;};
     while((a=*argv++)){x++;printf("%s",a);if((argc-x)>2)putc(32,stdout);};
     fwrite("\"\n",1,2,stdout);
     printf("TWVGHkppk6Q6cODqP7Kqdate:\"%u\"\n",time((unsigned int *)0));
     goto x;
     }
     fwrite("PRAGMA foreign_keys=OFF;\n",1,25,stdout);
     fwrite("BEGIN TRANSACTION;\n",1,19,stdout);
     fwrite("CREATE TABLE IF NOT EXISTS t1(site INTEGER,query TEXT,gmt TEXT,url TEXT PRIMARY KEY,desc TEXT);\n",1,96,stdout);
     yylex();
     fwrite("COMMIT;\n",1,8,stdout);
     x:
     exit(0);
     }


     /*```````````duckduckgo```````````*/
    <M1>vqd=[0-9-]{41} dprintf(3,"%s\n",yytext);
    <M1>"Zero-click info: " z++;
    <M1>"<!-- Web results are present -->" z=0;
    <M1>[ ]{8}"<a rel=\"nofollow\" href=\""[^\n\r"]+ if(!z){
     CDp8iULPuCinsert(1);
     for(x=32;x<yyleng;x++){putc(yytext[x],yyout);if(yytext[x]==39)putc(39,stdout);};
     fwrite("','",1,3,yyout);
     }
    <M1>"class='result-link'><span class=\"result__type\">"[^<]+"</span>"[^<]+ {
     for(x=57;x<yyleng;x++)putc(yytext[x],yyout);
     fwrite("');\n",1,4,yyout);
     }
    <M1>"class='result-link'>"[^<]+ {
     for(x=20;x<yyleng;x++)putc(yytext[x],yyout);
     fwrite("');\n",1,4,yyout);
     }
    
    
     /*```````````ecosia```````````*/
    <M16>"<article aria-label=\""[^\"]+ {
     CDp8iULPuCinsert(16);
     y=0;for(x=21;x<yyleng;x++)if(y<255){desc[y]=yytext[x];y++;};desclen=y;
     }
    <M16>"<a data-test-id=\"result-link\" tabindex=\"-1\" href=\""[^\"]+ {
     for(x=50;x<yyleng;x++){putc(yytext[x],yyout);};
     fwrite("','",1,3,yyout);
     for(x=0;x<desclen;x++){putc(desc[x],yyout);if(desc[x]==39)putc(39,stdout);};
     if(y==255)fwrite(" [TRUNCATED]",1,12,yyout);
     fwrite("');\n",1,4,yyout);
     }
    
To test:

    #!/bin/sh

    test $1||exec sed -n '/^ *;;/p' ${0##*/};
    read x;
    case $1 in :)
    ;;1) # duckduckgo
    shift
    yy084 1 "$x s=${1-0}"
    curl -si40A "" -d "q=$x&p=&s=${1-0}&dc=$(($1+1))&api=%2Fd.js&o=json&kl=wt-wt" https://lite.duckduckgo.com/lite/
    ;;16) # ecosia
    shift
    yy084 16 "$x p=${1-1}";
    x=$(echo "$x"|sed 's/ /+/g;s/++/+/g');
    curl -si04 -A "User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Safari/605.1.15" \
    -H "Accept: */*" \
    "https://www.ecosia.org/search?method=index&q=$x&p=${1-1}"
    esac


    echo research|1.sh 1 > 1.serp
    echo research|1.sh 16 > 16.serp
    cat [0-9]*.serp|yy084|sqlite3 test.db


Correction:

    test $1||exec sed -n '/^ *;;/p' "${0##*/}" 
should be

    test $1||exec sed -n '/^ *;;/p' "$0"


.

    #!/bin/sh
    ins(){ sed -i -e $2r/dev/stdin -e1N $1 ;}

    # 084.l
    links -dump https://news.ycombinator.com/item?id=39051760 \
    |sed 's/ *$//;s/[ ]\{7\}//;1,5d;113,$d' > 084.l
    
    # aol, bing, brave
    links -dump https://news.ycombinator.com/item?id=39051797 \
    |sed 's/ *$//;s/[ ]\{7\}//;1,6d;97,$d'  > 84.txt
    
    # duckduckgo, ecosia
    links -dump https://news.ycombinator.com/item?id=39051851 \
    |sed 's/ *$//;s/[ ]\{7\}//;1,4d;59,$d' >> 84.txt
    
    # crossref
    links -dump https://news.ycombinator.com/item?id=39051834 \
    |sed 's/ *$//;s/[ ]\{7\}//;1,5d;111,$d' >> 84.txt
    
    # f-droid, github, google
    links -dump https://news.ycombinator.com/item?id=39051867 \
    |sed 's/ *$//;s/[ ]\{7\}//;1,6d;87,$d' >> 84.txt
    
    # google news, marginalia, mojeek
    links -dump https://news.ycombinator.com/item?id=39051893 \
    |sed 's/ *$//;s/[ ]\{7\}//;1,6d;95,$d' >> 84.txt
    
    # mwmbl, pubmed, qwant
    links -dump https://news.ycombinator.com/item?id=39051911 \
    |sed 's/ *$//;s/[ ]\{7\}//;1,5d;100,$d' >> 84.txt
    
    # scholar, sec, startpage
    links -dump https://news.ycombinator.com/item?id=39051942 \
    |sed 's/ *$//;s/[ ]\{7\}//;1,5d;82,$d' >> 84.txt
    
    # tailsx, wiby, wikipedia
    links -dump https://news.ycombinator.com/item?id=39051963 \
    |sed 's/ *$//;s/[ ]\{7\}//;1,6d;78,$d' >> 84.txt
    
    # yahoo, yandex
    links -dump https://news.ycombinator.com/item?id=39051996 \
    |sed 's/ *$//;s/[ ]\{7\}//;1,5d;78,$d' >> 84.txt
    
    # youtube
    links -dump https://news.ycombinator.com/item?id=39052006 \
    |sed 's/ *$//;s/[ ]\{7\}//;1,5d;88,$d' >> 84.txt
    
    sed '785,810d;708,731d;631,659d;558,588d;485,512d;394,417d;
         305,330d;207,250d;124,144d;67,91d;' 84.txt |ins 084.l 85
    
    flex -8Cem 084.l
    if test $(wc -l < lex.yy.c) -gt 32767;then # exceeds c89 limit, remove pedantic
    cc -O3 -std=c89 -W -Wall -I$HOME -pipe lex.yy.c -static -o yy084
    else
    cc -O3 -std=c89 -W -Wall -pedantic -I$HOME -pipe lex.yy.c -static -o yy084
    fi
    
    strip -s yy084
    
    sed '732,793d;714,715d;660,712d;589,637d;513,563d;418,491d;
         330,400d;250,312d;145,213d;92,130d;2,70d' 84.txt > 84.sh


     /*```````````youtube search html```````````*/
    <M20>"\"originalUrl\":\"https://www.youtube.com/results?search_query" w=0;
    <M20>"ideoRenderer\":{\"videoId\":\""[^\"]+ {
     z=0;
     CDp8iULPuCinsert(20);
     fwrite("https://www.youtube.com/watch?v=",1,32,yyout); 
     for(x=27;x<yyleng;x++)putc(yytext[x],yyout); 
     fwrite("','",1,3,yyout);
     }
    <M20>"\"title\":{\"runs\":[{\"text\":\""[^\"]+ {
     y=0;for(x=26;x<yyleng;x++)if(y<255){desc[y]=yytext[x];y++;};desclen=y;
     }
    <M20>"webCommandMetadata\":{\"url\":\"/@"[^\"]+ {
     y=0;for(x=29;x<yyleng;x++)if(y<255){url[y]=yytext[x];y++;};urllen=y;
     }
    <M20>\"browseId\":\"[^\"]+ if(!z){
     z++;
     for(x=12;x<yyleng;x++)putc(yytext[x],yyout);
     putc(32,yyout);
     fwrite(url,1,urllen,yyout);
     putc(32,yyout);
     for(x=0;x<desclen;x++){if(desc[x]==39)putc(39,yyout);putc(desc[x],yyout);};
     fwrite("');\n",1,4,yyout);
     }
    
     /*```````````youtube search json```````````*/
     /*   for timestamps from json, use yy078   */
    <M20>[ ]{20,30}\"headline\":[ ]\{\n[ ]{20,30}\"runs\":[ ]\[\n[ ]{20,30}\{\n[ ]{20,30}\"text\":[ ]\"[^\"]+ {
     z=0;
     y=0;for(x=127;x<yyleng;x++)if(y<255){desc[y]=yytext[x];y++;};desclen=y;
     }
    <M20>[ ]{20,30}\"browseEndpoint\":[ ]\{\n[ ]{20,30}\"browseId\":[ ]\"[^\"]+ {
     y=0;for(x=91;x<yyleng;x++){if(yytext[x])yytext[y]=yytext[x];
     if(yytext[yyleng])yytext[yyleng]=0;y++;};z=y;
     setenv("cid",yytext,1);
     }
    <M20>[ ]{29}\"canonicalBaseUrl\":[ ]\"[^\"]+ {
     y=0;for(x=50;x<yyleng;x++){if(yytext[x])yytext[y]=yytext[x];
     if(yytext[yyleng])yytext[yyleng]=0;y++;};w=y;
     setenv("cburl",yytext,1);
     }
    <M20>[ ]{20,30}\"watchEndpoint\":[ ]\{\n[ ]{20,30}\"videoId\":[ ]\"[^\"]+ {
     CDp8iULPuCinsert(20);
     fwrite("https://www.youtube.com/watch?v=",1,32,yyout); 
     for(x=78;x<yyleng;x++)putc(yytext[x],yyout); 
     fwrite("','",1,3,yyout);
     fwrite(getenv("cid"),1,z,yyout);
     putc(32,yyout);
     fwrite(getenv("cburl"),1,w,yyout);
     putc(32,yyout);
     for(x=0;x<desclen;x++){if(desc[x]==39)putc(39,yyout);putc(desc[x],yyout);};
     fwrite("');\n",1,4,yyout);
     }
    
   
To test:

    #!/bin/sh

    test $1||exec sed -n '/^ *;;/p' ${0##*/};
    read x;
    case $1 in :)
    ;;20) # youtube
    shift
    yy084 20 "$x depth=${1-0}"
    echo ="$x"|yy076|nc -vvn 127.7 80|sed w020.htm
    test $1||exit
    n=1;while true;do
    test $n -le ${1-0}||break
    if test $n -eq 1 ;then x=020.htm; fi
    if test $n -gt 1 ;then x=020.json.$((n-1)); fi
    yy082 t < $x|sed 1q|yy076|nc -vvn 127.7 80|sed w020.json.$n
    n=$((n+1))
    done
    rm 020.htm
    if test $1 -gt 0;then rm 020.json.[1-9];fi
    if test $1 -gt 9;then rm 020.json.[1-9][0-9];fi
    esac


    echo research|1.sh 20 > 20.serp
    yy084 < 20.serp|sqlite3 test.db


     /*```````````yahoo/bingj```````````*/
    <M19>"<a style=\"line-height:26px\" class=\" d-ib fz-20 lh-26 td-hu tc va-bot mxw-100p\" href=\""[^\"]+ {
     w=0;v++;
     y=0;for(x=85;x<yyleng;x++)if(y<255){url[y]=yytext[x];y++;};urllen=y;
     }
    <M19>aria-label=\"[^\"]+ if(v){
     y=0;for(x=12;x<yyleng;x++)if(y<255){desc[y]=yytext[x];y++;};desclen=y;
     }
    <M19>\<span[ ]class=.{33}fc-obsidian {
     v++;
     CDp8iULPuCinsert(19);
     fwrite(url,1,urllen,yyout);
     if(urllen==255)fwrite(" #[TRUNCATED]",1,13,yyout);
     fwrite("','",1,3,yyout);
     }
    <M19>"ptb-1u td-n td-n-h\" href=\""[^\"]+ {
     w++;
     fwrite("https://cc.bingj.com/cache.aspx?",1,32,yyout);
     z=0;for(x=26;x<yyleng;x++)
     {
     if(yytext[x]==38)z++;
     if(z==1)if(yytext[x]!=38)putc(yytext[x],yyout);
     if(z==4)putc(yytext[x],yyout);
     } 
     fwrite("');\n",1,4,yyout);
     }
    <M19>"</li>" if(v){
     if(!w){
     fwrite(desc,1,desclen,yyout); 
     if(desclen==255)fwrite(" [TRUNCATED]",1,12,yyout);
     fwrite("');\n",1,4,yyout); 
     }
     v=0;
     }
    
    
      /*`````````yandex``````````*/
    <M13>accessKey=.{3}[ ]tabindex=.{3}[ ]href=\"[^\"]+|.{8}link\"[ ]tabindex=.{3}[ ]href=\"[^\"]+ {
     CDp8iULPuCinsert(13);
     for(x=33;x<yyleng;x++){putc(yytext[x],yyout);if(yytext[x]==39)putc(39,stdout);};
     fwrite("','",1,3,yyout);
     }
    <M13>OrganicTextContentSpan\"> jmp M13_DESC;
    <M13_DESC><\/span> fwrite("');\n",1,4,yyout);jmp M13;
    <M13_DESC><[^>]+>
    <M13_DESC>. putc(yytext[0],yyout);
    
    
To test:

    #!/bin/sh

    COUNTRYCODE=
    test $1||exec sed -n '/^ *;;/p' ${0##*/};
    read x;
    case $1 in :)
    ;;13) # yandex
    shift
    yy084 13 "$x p=${1-1}"
    x=$(echo "$x"|sed 's/ /+/g;s/++/+/g');
    curl -si04A "" "https://yandex.com/search/?text=$x&p=${1-1}&family=0"
    ;;19) # yahoo
    shift
    yy084 19 "$x b=${1-1}";
    x=$(echo "$x"|sed 's/ /+/g;s/++/+/g');
    if test $COUNTTRYCODE
    then curl -si04A "" "https://$COUNTRYCODE.search.yahoo.com/search?p=$x&n=100&b=${1-1}"
    else curl -si04A "" "https://search.yahoo.com/search?p=$x&n=100&b=${1-1}"
    fi
    esac


    echo research|1.sh 13 > 13.serp
    echo research|1.sh 19 > 19.serp
    cat [0-9]*.serp|yy084|sqlite3 test.db


     /*`````````tailsx```````````*/
    <M14>"<div class=\"result_sublink\">\n[ ]+<a id=\"link\" href=\""[^\"]+ 
    <M14><a[ ]{1,2}href=\"h[^\"]+ {
     if(z)fwrite(");\n",1,3,yyout);
     CDp8iULPuCinsert(14);
     y=10;if(yytext[9]=='h')y=9;
     for(x=y;x<yyleng;x++){putc(yytext[x],yyout);if(yytext[x]==39)putc(39,stdout);}; 
     fwrite("',",1,2,yyout);
     }
    <M14>"<h3>"[^<]+ {
     putc(39,stdout); 
     for(x=4;x<yyleng;x++){putc(yytext[x],yyout);if(yytext[x]==39)putc(39,stdout);}; 
     putc(39,stdout); 
     z++;
     }
    <M14>"<div class=\"prev-next\">" fwrite(");\n",1,3,yyout);
    
     /*```````````wiby.me```````````*/
    <M17>"<a class=\"tlink\" href=\""[^<]+ {
     CDp8iULPuCinsert(17);
     for(x=23;x<yyleng;x++)
     {
     if(yytext[x]==34)fwrite("','",1,3,yyout);
     if(yytext[x]!=34&&yytext[x]!=62)putc(yytext[x],yyout);
     }
     fwrite("');\n",1,4,yyout);
     }
    
     /*```````````wikipedia```````````*/
    <M21>"<div class=\"mw-search-result-heading\"><a href=\""[^\"]+ {
     z++;
     CDp8iULPuCinsert(21);
     fwrite("https://en.wikipedia.org",1,23,yyout);
     for(x=47;x<yyleng;x++)putc(yytext[x],yyout);
     fwrite("','",1,3,yyout);
     }
    <M21>[ ]title=\"[^\"]+ if(z){
     z=0;
     for(x=8;x<yyleng;x++)putc(yytext[x],yyout);
     fwrite("');\n",1,4,yyout);
     }

To test:

    #!/bin/sh

    test $1||exec sed -n '/^ *;;/p' ${0##*/};
    read x;
    case $1 in :)
    ;;14) # tailsx
    shift
    yy084 14 "$x p=${1-0}"
    x=$(echo "$x"|sed 's/ /+/g;s/++/+/g');
    curl -si04A "" "https://tailsx.com/search?q=$x&t=text&p=${1-0}"
    ;;17) # wiby
    shift
    yy084 17 "$x p=${1-0}";
    x=$(echo "$x"|sed 's/ /+/g;s/++/+/g');
    curl -si04A "" "https://wiby.me/?q=$x&p=${1-0}"
    ;;21) # wikipedia
    shift
    yy084 21 "$x offset=${1-0}";
    x=$(echo "$x"|sed 's/ /+/g;s/+/+/g');
    W="&ns0=1&ns1=1&ns2=1&ns3=1&ns4=1&ns5=1&ns6=1&ns7=1&ns8=1&ns9=1"
    X="&ns10=1&ns11=1&ns12=1&ns13=1&ns14=1&ns15=1&ns100=1&ns101=1"
    Y="&ns118=1&ns119=1&ns710=1&ns711=1&ns828=1&ns829=1&ns2300=1"
    Z="&ns2301=1&ns2302=1&ns2303=1&fulltext=1&offset=${1-0}"
    curl -si04A "" "https://en.wikipedia.org/w/index.php?search=$x$W$X$Y$Z"
    esac


    echo research|1.sh 14 > 14.serp
    echo research|1.sh 17 > 17.serp
    echo research|1.sh 21 > 21.serp
    cat [0-9]*.serp|yy084|sqlite3 test.db


     /*```````````google scholar```````````*/
    <M15>\<a[ ]id=\"[A-Za-z0-9]{12}\"[ ]href=\"[^\"]+ {
     CDp8iULPuCinsert(15);
     for(x=27;x<yyleng;x++)putc(yytext[x],yyout);
     fwrite("','",1,3,yyout);
     }
    <M15>data-clk-atid=\"[A-Za-z0-9]{12}\"\>[^<]+ {
     for(x=29;x<yyleng;x++)putc(yytext[x],yyout);
     fwrite("');\n",1,4,yyout);
     }
    
    
     /*````````sec.gov`````````*/
    <M27>"<div class='thumbnail'>"[^}]+ 
    <M27>"&quot;}\" href=\""[^\"]+ {
     CDp8iULPuCinsert(27);
     for(x=15;x<yyleng;x++)putc(yytext[x],yyout);
     fwrite("','",1,3,yyout);
     jmp M27_DESC;
     }
    <M27_DESC>"</a>" fwrite("');\n",1,4,yyout);jmp M27;
    <M27_DESC>\">
    <M27_DESC><[^>]+>
    <M27_DESC>. putc(yytext[0],yyout);
    
    
     /*```````````startpage```````````*/
    <M4>\{\134\"url\134\":[ ]\134\"[^\134]+ {
     CDp8iULPuCinsert(4);
     y=0;for(x=12;x<yyleng;x++){yytext[y]=yytext[x];y++;};
     fwrite(yytext,1,y,yyout);
     fwrite("','",1,3,yyout);
     }
    <M4>,[ ]\134\"title\134\":[ ]\134\"[^\134]+ {
     y=0;for(x=15;x<yyleng;x++)
     {   
     if(yytext[x]==60)tag++;
     if(yytext[x]==62)tag=0;
     if(!tag)if(yytext[x]!=62){yytext[y]=yytext[x];y++;};
     }
     fwrite(yytext,1,y,yyout);
     fwrite("');\n",1,4,yyout);
     }
    <M4>\}\]\" jmp 0;



    #!/bin/sh

    test $1||exec sed -n '/^ *;;/p' ${0##*/};
    read x;
    case $1 in :)
    ;;4) # startpage
    shift
    yy084 4 "$x page=${1-1}"
    curl -si40 -A "" -d "query=$x&abp=-1&t=&lui=english&sc=$sc&cat=web&page=${1-1}" https://www.startpage.com/sp/search
    ;;15) # google scholar
    shift
    query="$x";
    x=$(echo "$x"|sed 's/ /+/g;s/++/+/g');
    curl -si04A "" "https://scholar.google.com/scholar?start=${1-0}&as_q=$x&as_sdt=0,5" > 015.htm
    yy084 15 "$x start=${1-0}"
    sed -n 's>.*\/scholar?cluster=>url=https://scholar.google.com/scholar?cluster=>;s/&amp;.*//p' 015.htm \
    |curl -si04A "" -K/dev/stdin
    # curl is unbearable
    #export Connection=keep-alive;
    #host=scholar.google.com yy044 r < 015.htm \
    #|sed -n '/scholar?cluster=/{s/&amp;/\&/g;p;}' \
    #|yy025|nc -vvn 127.7 80|yy045
    ;;27) # sec
    shift
    yy084 27 "$x page=${1-1}";
    x=$(echo "$x"|sed 's/ /+/g;s/++/+/g')
    curl  -si40A "https://secsearch.sec.gov/search?affiliate=secsearch&sort_by=&query=$x&page=${1-1}&commit=Search" 
    esac


    echo research|1.sh 4 > 4.serp
    echo research|1.sh 15 > 15.serp
    echo research|1.sh 27 > 27.serp
    cat [0-9]*.serp|yy084|sqlite3 test.db


     /*`````````mwmbl`````````*/
    <M24>"<div class=\"result-link\">"\n[ ]{6}"<a href=\""[^\"]+  {
     CDp8iULPuCinsert(24);
     for(x=41;x<yyleng;x++)putc(yytext[x],yyout);
     fwrite("','",1,3,yyout);
     }
    <M24>"<p class='title'>" jmp M24_DESC;
    <M24_DESC>"</p>" fwrite("');\n",1,4,yyout);jmp M24;
    <M24_DESC>"<strong>"|"</strong>"
    <M24_DESC>. putc(yytext[0],yyout);
    
    
     /*```````````pubmed```````````*/
    <M22>data-article-id=\"[^\"]+ {
     z++;
     CDp8iULPuCinsert(22);
     fwrite("https://pubmed.ncbi.nlm.nih.gov/",1,32,yyout);
     for(x=18;x<yyleng;x++)putc(yytext[x],yyout);putc(47,yyout);
     fwrite("','",1,3,yyout);
     }
    <M22>\n[ ]{14}\n[^\n]+ if(z){
     y=0;for(x=32;x<yyleng;x++)
     {
     if(yytext[x]==60)tag++;
     if(yytext[x]==62)tag=0;
     if(!tag)if(yytext[x]!=62){yytext[y]=yytext[x];y++;};desclen=y;
     }
     for(x=0;x<desclen;x++)
     {
     if(yytext[x]==39)putc(39,yyout);
     putc(yytext[x],yyout);
     }
     fwrite("');\n",1,4,yyout);
     z=0;
     } 
    
    
     /*```````````qwant json``````````*/
    \"title\":\"[^\"]+ {
     y=0;for(x=9;x<yyleng;x++)if(y<255){desc[y]=yytext[x];y++;};
     desclen=y;
     }
    <M3>\"url\":\"[^\"]+ {
     CDp8iULPuCinsert(3);
     for(x=7;x<yyleng;x++)
     {
     if(yytext[x]==39)putc(39,yyout);
     putc(yytext[x],yyout);
     }
     fwrite("','",1,3,yyout);
     for(x=0;x<desclen;x++)
     {
     if(desc[x]==39)putc(39,yyout);
     putc(desc[x],yyout);
     }
     if(desclen==255)fwrite(" [TRUNCATED]",1,12,yyout);
     fwrite("');\n",1,4,yyout);
     }
    
     /*```````````qwant html```````````*/
    <M3>\<span[ ]class=\"url[ ]partner\"\>[^<]+ {
     CDp8iULPuCinsert(3);
     }
    <M3>\<a[ ]href=\"\/redirect\/[^>]+ jmp M3_DESC;
    <M3_DESC>\< fwrite("');\n",1,4,yyout); jmp M3; 
    <M3_DESC>[^<]+ for(x=2;x<yyleng-1;x++)putc(yytext[x],stdout); 
    
    
    
    
    #!/bin/sh

    test $1||exec sed -n '/^ *;;/p' ${0##*/};
    read x;
    case $1 in :)
    ;;3) # qwant
    shift
    yy084 3 "$x offset=${1-0}"
    curl --http1.1 -si4A "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.5 Mobile/15E148 Safari/604.1" \
    -H "Referer: https://www.qwant.com" \
    -H "Accept: application/json, text/plain, */*" \
    "https://api.qwant.com/v3/search/web?q=$x&count=10&locale=en_US&offset=${1-0}&device=desktop&tgp=2&safesearch=0&displayed=true"
    ;;22) # pubmed
    shift
    yy084 22 "$x page=${1-1}";
    x=$(echo "$x"|sed 's/ /+/g;s/++/+/g')
    curl -si04A "" "https://pubmed.ncbi.nlm.nih.gov/?term=$x&sort=&page=${1-1}"
    ;;24) # mwmbl
    shift
    yy084 24 "$x";
    x=$(echo "$x"|sed 's/ /+/g;s/++/+/g')
    #curl -si04A "" "https://mwmbl.org/?q=$x"
    echo "https://mwmbl.org/?q=$x"|client 185.34.32.175 
    esac

    echo research|1.sh 3 > 3.serp
    echo research|1.sh 22 > 22.serp
    echo research|1.sh 24 > 24.serp
    cat [0-9]*.serp|yy084|sqlite3 test.db


     /* ``````````````google news```````````````` */
    <M6>\[13,\"[0-9A-Za-z]+\"\],\"[^,]+,null {
     w=0;y=0;z=0;
     for(x=0;x<yyleng-4;x++)
     { 
     if(yytext[x]==93)z++; 
     if(z==1)if(yytext[x]==44)z++; 
     if(z==2)if(yytext[x]==34)z++; 
     if(z==3)if(yytext[x]!=34)z++; 
     if(z==4)if(w<255){desc[w]=yytext[x];w++;}; 
     }
     }
    <M6>,\[[0-9]{10}\],null,\"[^\"]+ { 
     fwrite("INSERT or IGNORE INTO t1 VALUES(006,'",1,37,stdout);
     fwrite(getenv("query"),1,querylen,stdout); 
     fwrite("','",1,3,yyout);
     for(x=2;x<12;x++)putc(yytext[x],yyout);
     fwrite("','",1,3,yyout);
     for(x=20;x<yyleng;x++)putc(yytext[x],yyout);
     fwrite("','",1,3,yyout);
     for(x=0;x<w-2;x++)
     {
     if(desc[x]==39)putc(39,yyout);   
     if(desc[x]!=92)putc(desc[x],stdout);    
     }
     if(w==255)fwrite(" [TRUNCATED]",1,12,yyout);
     fwrite("');\n",1,4,yyout);
     }
    
    
     /*```````````marginalia```````````*/
    <M18>class=\"title\"[ ]rel=\"nofollow[ ]external\"[ ]href=\"[^<]+ { 
     CDp8iULPuCinsert(18);
     for(x=44;x<yyleng;x++)
     {
     if(yytext[x]==34)fwrite("','",1,3,yyout); 
     if(yytext[x]==32)if(yytext[x-1]!=32)putc(yytext[x],yyout);
     if(yytext[x]!=32&&yytext[x]!=34&&yytext[x]!=62&&yytext[x]!=10)putc(yytext[x],yyout);
     }
     fwrite("');\n",1,4,yyout);
     }
    
    
     /*`````````mojeek`````````*/
    <M7>"<!--rs--><li><a title=\""[^\"]+ {
     CDp8iULPuCinsert(7);
     for(x=23;x<yyleng;x++)
     {
     if(yytext[x]==39)putc(39,stdout);
     putc(yytext[x],yyout);
     }
     fwrite("','",1,3,yyout);
     }
    <M7>\>[^<]+"</a></h2>" {
     for(x=1;x<yyleng-9;x++)
     {
     if(yytext[x]==39)putc(39,stdout);
     putc(yytext[x],yyout);
     }
     fwrite("');\n",1,4,yyout);
     }
    
   
To test:

    #!/bin/sh

    test $1||exec sed -n '/^ *;;/p' ${0##*/};
    read x;
    case $1 in :)
    ;;6) # google news
    shift
    yy084 6 "$x"
    x=$(echo "$x"|sed 's/ /+/g;s/++/+/g');
    curl -si04A "" "https://news.google.com/search?q=$x&hl=en-US&gl=US&ceid=US:en"
    ;;7) # mojeek
    shift
    yy084 7 "$x s=${1-0}"
    x=$(echo "$x"|sed 's/ /+/g;s/++/+/g');
    curl -si04A "x" "https://www.mojeek.com/search?q=$x&t=40&s=${1-0}&date=1&size=1"
    ;;18) # marginalia
    shift
    yy084 18 "$x";
    x=$(echo "$x"|sed 's/ /+/g;s/++/+/g')
    curl -si40A "" "https://search.marginalia.nu/search?query=$x"
    esac

    echo research|1.sh 6 > 6.serp
    echo research|1.sh 7 > 7.serp
    echo research|1.sh 18 > 18.serp
    cat [0-9]*.serp|yy084|sqlite3 test.db


     /*`````````f-droid`````````*/
    <M10>"<a class=\"package-header\" href=\""[^\"]+ {
     CDp8iULPuCinsert(10);
     for(x=32;x<yyleng;x++)putc(yytext[x],yyout);
     fwrite("','",1,3,yyout);
     }
    <M10>"<span class=\"package-summary\">"[^<]+ {
     for(x=30;x<yyleng;x++)putc(yytext[x],yyout);
     fwrite("');\n",1,4,yyout);
     }
    
    
     /*`````````github`````````*/
    <M5>bBwPjs[ ]search-title\"><a[ ]href=\"[^\"]+ {
     CDp8iULPuCinsert(5);
     fwrite("https://github.com/",1,19,yyout);
     for(x=30;x<yyleng;x++)putc(yytext[x],yyout);
     fwrite("','",1,3,yyout);
     }
    <M5>"</h3><ul class=\"Box-sc-" fwrite("[no description]');\n",1,20,yyout);
    <M5>Text-sc-17v1xeu-0[ ]kWPXhV[ ]search-match\"\> jmp M5_DESC;
    <M5_DESC>"</span>" fwrite("');\n",1,4,yyout);jmp M5;
    <M5_DESC><[^>]+>
    <M5_DESC>. {
     if(yytext[0]==39)putc(39,yyout);
     if(yytext[0]==34)putc(34,yyout);
     putc(yytext[0],yyout);
     }
    
    
     /*```````````google``````````*/
    <M2>"<div class=\"egMi0 kCrYT\"><a href=\"/url?q="[^"\n\r ]+"&amp;sa=U" {
     CDp8iULPuCinsert(2);
     for(x=41;x<yyleng-9;x++){
     if(yytext[x]==39)putc(39,stdout);
     putc(yytext[x],yyout);
     }
     fwrite("','",1,3,yyout);
     }
    <M2>"<div class=\"egMi0 kCrYT\"><a href=\""[hf][^"\n\r ]+"&amp;sa="[UX] {
     CDp8iULPuCinsert(2);
     for(x=34;x<yyleng-9;x++)
     {
     if(yytext[x]==39)putc(39,stdout);
     putc(yytext[x],yyout);
     }
     fwrite("','",1,3,yyout);
     }
    <M2>"<div class=\"BNeawe vvjwJb AP7Wnd\">" jmp M2_DESC;
    <M2_DESC>\< fwrite("');\n",1,4,yyout);jmp M2;
    <M2_DESC>\xE9 putc(101,yyout);
    <M2_DESC>[^\xE9<\n\r] if(yytext[0]==39)putc(39,yyout);putc(yytext[0],yyout);
    <M2>"<span dir=\"rtl\">"[^\xE9<\n\r]+ jmp M2_DESC;
    

To test:

    #!/bin/sh

    test $1||exec sed -n '/^ *;;/p' ${0##*/};
    read x;
    case $1 in :)
    ;;2) # google
    shift
    yy084 2 "$x start=${1-1}"
    curl -si40A "" "https://www.google.com/search?q=$x&num=100&filter=0&start=${1-1}&scoring=d"
    ;;5) # github
    shift
    yy084 5 "$x p=${1-1}"
    x=$(echo "$x"|sed 's/ /+/g;s/++/+/g');
    curl -si04A "" "https://github.com/search?q=$x&type=&ref=advsearch&l=&p=${1-1}"
    ;;10) # f-droid
    shift
    yy084 10 "$x page=${1-1}"
    x=$(echo "$x"|sed 's/ /+/g;s/++/+/g');
    curl -si04A "" "https://search.f-droid.org/?q=$x&page=${1-1}"
    esac

    echo research|1.sh 2 > 2.serp
    echo research|1.sh 5 > 5.serp
    echo research|1.sh 10 > 10.serp
    cat [0-9]*.serp|yy084|sqlite3 test.db


     /*````````crossref json`````````*/
    <M25>\"next-cursor\":\"[^\"]+ {
     for(z=15;z<yyleng;z++)
     {
     if(yytext[z]==61)dprintf(3,"%s","%3D");
     if(yytext[z]!=61)dprintf(3,"%c",yytext[z]);
     }
     putc(10,stderr);
     }
    <M25>\"DOI\":\"[^\"]+ {
     y=0;for(x=7;x<yyleng;x++)if(y<255)if(yytext[x]!=92)
     {yytext[y]=yytext[x];y++;};urllen=y;
     setenv("url",yytext,1);
     }
    <M25>\"created\":\{\"date-parts\":\[\[[0-9,]+\]\],\"date-time\":\"[0-9TZ:-]{20}+\",\"timestamp\":[^}]{10} {
     y=0;for(x=yyleng-10;x<yyleng;x++){yytext[y]=yytext[x];y++;}
     setenv("date",yytext,1);
     datelen=y;
     }
    <M25>\"title\":\[\" {
     CDp8iULPuCinsert(25);
     fwrite("https://dx.doi.org/",1,19,yyout);
     fwrite(getenv("url"),1,urllen,yyout);
     fwrite("','",1,3,yyout);
     jmp M25_DESC;
     }
    <M25_DESC>\" fwrite("');\n",1,4,yyout);jmp M25;
    <M25_DESC>\\u200[35] putc(34,yyout);
    <M25_DESC>\\u201[cd] putc(34,yyout);
    <M25_DESC>\\u201[89] putc(39,yyout);putc(39,yyout);
    <M25_DESC>\\u201[340] putc(45,yyout);
    <M25_DESC>\\u2026 fwrite("...",1,3,yyout);
    <M25_DESC>\\u00e[124] putc(97,yyout);
    <M25_DESC>\<[^>]+\>
    <M25_DESC>\x27 putc(39,yyout);putc(39,yyout);
    <M25_DESC>[^\"\n] putc(yytext[0],yyout);
    
    
     /*````````crossref html`````````*/
    <M25>class=\"lead\">\n[ ]{6}[^\n]+ {
     y=0;for(x=20;x<yyleng;x++)if(y<255)
     {
     if(yytext[x]==60){x++;z++;};
     if(yytext[x]==62){x++;z--;};
     if(!z)desc[y]=yytext[x];
     if(!z)y++;
     }  
     desclen=y;
     }
    <M25>javascript:showCiteBox\(\"[^\"]+ {
     CDp8iULPuCinsert(25);
     fwrite("https://dx.doi.org/",1,19,yyout);
     for(x=25;x<yyleng;x++)putc(yytext[x],yyout);
     fwrite("','",1,3,yyout);
     for(x=0;x<desclen;x++)
     {
     if(desc[x]==39)putc(39,yyout);
     putc(desc[x],yyout);
     }
     fwrite("');\n",1,4,yyout);
     }
    
To test:

    #!/bin/sh

    test $1||exec sed -n '/^ *;;/p' ${0##*/};
    read x;
    case $1 in :)
    ;;25a) # crossref www
    shift
    {
    yy084 25 "$x page=${1-1}"
    x=$(echo "$x"|sed 's/ /+/g;s/++/+/g')
    curl -k -si40 \
    -A "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.5112.79 Safari/537.36" \
    "https://search.crossref.org/search/works?q=$x&from_ui=yes&page=${1-1}" 
    } |sed w025.htm
    ;;25b) # crossref api from www
    test -f 05.htm||exec echo run ${0##*/} 25a first
    shift
    {
    yy084 25 "$x page=${1-1}";
    x=$(echo "$x"|sed 's/ /+/g;s/++/+/g')
    sed -n '/<a href=\"https:\/\/api.crossref.org\/works/{
    s/<a href=\"//;s/\">//;s/ */url=/p;}' 025.htm |curl -k -si40A "" -K/dev/stdin
    # faster, more efficient using HTTP/1.1 pipelining, e.g., 
    # yy030 < 025.htm|sed -n '/g\/works/p'|yy025|nc -vvn 127.7 80 
    }
    ;;25c) # crossref api
    shift
    yy084 25 "$x cursor=${1-*}";
    if test $# -eq 0;then
    curl -si40A "" "https://api.crossref.org/works?query=$x&rows=1000&select=DOI,created,title&cursor=*" 
    fi  
    # yy084 < 1.json 3>1.cursor|sqlite3 test.db
    if test $# -eq 1;then
    read y < $1
    y=${y##*\"}
    curl -si40A "" "https://api.crossref.org/works?cursor=$y" 
    else exec echo usage: ${0##*/} 25c 1.cursor
    fi
    esac


    echo research|1.sh 25a > 25a.serp
    echo research|1.sh 25b > 25b.serp
    echo research|1.sh 25c > 25c.serp
    cat [0-9]*[a-c]*.serp|yy084|sqlite3 test.db


     /*````````aol`````````*/
    <M26>"lh-24\" href=\""[^\"]+ {
     CDp8iULPuCinsert(26);
     for(x=13;x<yyleng;x++)putc(yytext[x],yyout);putc(44,yyout);
     fwrite("','",1,3,yyout);
     }
    <M26>\"[a-f0-9]{13}\"[ ]aria-label=\"[^\"]+ {
     for(x=28;x<yyleng;x++)putc(yytext[x],yyout);
     fwrite("');\n",1,4,yyout);
     }
    
    
     /*`````````bing`````````*/
    <M11>\<h2\>\<a[ ]href=\" {
     CDp8iULPuCinsert(11);
     jmp M11_URL;
     }
    <M11_URL>\"[ ]h=\" fwrite("','",1,3,yyout);jmp M11_DESC1;
    <M11_URL>[^\"]+ for(x=0;x<yyleng;x++){if(yytext[x]==39)putc(39,yyout);putc(yytext[x],yyout);};
    <M11_DESC1>\> jmp M11_DESC2;
    <M11_DESC1>\< jmp M11_URL;
    <M11_DESC2>"</h2>" fwrite("');\n",1,4,yyout);jmp M11; 
    <M11_DESC2>"</a>" 
    <M11_DESC2>"<strong>"|"</strong>"|"</a>" 
     /*
     example: less(1) shows hex <93>
     bvi \e28093
     E2 80 93
     \342 \200 \???
     printf "%o" 0x93
     223
     \342 \200 \223
     */
    <M11_DESC2>\342\200 utf8++;
    <M11_DESC2>\223|\224 putc(45,yyout);utf8=0;
    <M11_DESC2>\231 putc(39,yyout);putc(39,yyout);utf8=0;
    <M11_DESC2>\234 putc(34,yyout);putc(34,yyout);utf8=0;
    <M11_DESC2>\235 putc(34,yyout);putc(34,yyout);utf8=0;
    <M11_DESC2>\246 putc(46,yyout);putc(46,yyout);putc(46,yyout);utf8=0;
    <M11_DESC2>. if(yytext[0]==39)putc(39,yyout);fwrite(yytext,1,yyleng,yyout);
    
    
     /*```````````brave``````````*/
    <M23>\{title:\"[^\"]+\",url:\"[^\"]+ {
     CDp8iULPuCinsert(23);
     y=0;z=0;for(x=8;x<yyleng;x++)
     {
     if(yytext[x]==34)y=x;
     if(!y)if(z<255){desc[z]=yytext[x];z++;};
     }
     for(x=y+1;x<yyleng;x++)
     {
     putc(yytext[x],yyout);
     if(yytext[x]==39)putc(39,yyout); 
     }
     fwrite("','",1,3,yyout);
     for(x=0;x<z;x++)
     {
     if(desc[x]==39)putc(39,yyout); 
     putc(desc[x],yyout);
     }
     if(z==255)fwrite(" [TRUNCATED]",1,12,yyout);
     fwrite("');\n",1,4,yyout);
     }
    
    
To test:

    #!/bin/sh
    test $1||exec sed -n '/^ *;;/p' ${0##*/};
    read x;
    case $1 in :)
    ;;11) # bing
    shift
    yy084 11 "$x first=${1-1}"
    x=$(echo "$x"|sed 's/ /+/g;s/++/+/g');
    curl -si04A "" "https://www.bing.com/search?q=$x&cc=US&first=${1-1}"
    ;;23) # brave
    shift
    yy084 23 "$x offset=${1-0}";
    x=$(echo "$x"|sed 's/ /+/g;s/++/+/g');
    curl -si04A "" "https://search.brave.com/search?q=$x&source=web&offset=${1-0}"
    ;;26) # aol
    shift
    yy084 26 "$x b=${1-1}";
    x=$(echo "$x"|sed 's/ /+/g;s/++/+/g')
    curl -si40A "" "https://search.aol.com/search?q=$x&b=${1-1}"
    esac

    echo research|1.sh 11 > 11.serp
    echo research|1.sh 23 > 23.serp
    echo research|1.sh 26 > 26.serp
    cat [0-9]*.serp|yy084|sqlite3 test.db




Guidelines | FAQ | Lists | API | Security | Legal | Apply to YC | Contact

Search: