t* cnn lite on gopher
       
   URI git clone git://git.codevoid.de/cnn-gopher.git
   DIR Log
   DIR Files
   DIR Refs
       ---
       tcnn-gopher.dcgi (3237B)
       ---
            1 #!/usr/local/bin/perl
            2 
            3 # Author:  Stefan Hagen <sh[at]codevoid[dot]de>
            4 # Web:     http://codevoid.de
            5 # Document License: ISC (see LICENSE file)
            6 
            7 use strict;
            8 use warnings;
            9 
           10 use XML::LibXML qw( );
           11 use LWP::UserAgent;
           12 use HTML::LinkExtractor;
           13 use HTML::Restrict;
           14 use HTML::Entities;
           15 use Text::Wrap;
           16 $Text::Wrap::columns=72;
           17 use Encode;
           18 
           19 print '
           20                 .-\') _      .-\') _  
           21                ( OO ) )    ( OO ) ) 
           22    .-----. ,--./ ,--,\' ,--./ ,--,\'
           23   \'  .--./ |   \\ |  |\\ |   \\ |  |\\  
           24   |  |(\'-. |    \\|  | )|    \\|  | ) 
           25  /_) |OO  )|  .     |/ |  .     |/  
           26  ||  |`-\'| |  |\\    |  |  |\\    |   
           27 (_\'  \'--\'\\ |  | \\   |  |  | \\   |
           28    `-----\' `--\'  `--\'  `--\'  `--\'
           29 lite.cnn.io - on gopher - inofficial
           30 
           31 ';
           32 
           33 my $protocol = "http";
           34 my $server   = "lite.cnn.io";
           35 
           36 
           37 my $uri      = "/en";
           38 
           39 if($ARGV[1]) {
           40     $uri = "/en/article/$ARGV[1]";
           41 }
           42 
           43 # fetch data
           44 my $REST= ({HOST => "$server",
           45             URL  => "$protocol://$server$uri" });
           46 $REST->{UA} = LWP::UserAgent->new(keep_alive => 0, timeout => 5);
           47 $REST->{UA}->agent("codevoid-cnn-lite-gopherproxy/0.1"); 
           48 $REST->{resource} = $REST->{URL};
           49 $REST->{request}  = HTTP::Request->new( GET => $REST->{resource} );
           50 $REST->{response} = $REST->{UA}->request( $REST->{request} );
           51 
           52 my $document = $REST->{response}->content;
           53 
           54 if($ARGV[1]) {
           55     # Replace some HTML elements
           56     my $HR = HTML::Restrict->new();
           57 
           58     $document =~ s/.*(<h1>.*)/$1/g;
           59     $document =~ s/Go to the full CNN experience//g;
           60 
           61     $document =~ s/<h1>/\n# /g;
           62     $document =~ s/<\/h1>/\n/g;
           63     $document =~ s/<div>/\n/g;
           64     $document =~ s/<\/div>/\n/g;
           65     $document =~ s/<p>/\n\n/g;
           66     $document =~ s/<li>/\n\n\* /g;
           67     $document =~ s/<blockquote>/\n\n--- QUOTE ---\n/g;
           68     $document =~ s/<\/blockquote>/\n---- END ----\n\n/g;
           69 
           70     # Strip remaining html
           71     my $document = $HR->process($document);
           72 
           73     # htmldecode (quick fix - could be done properly)
           74     $document =~ s/&amp;/\&/gi;
           75     $document =~ s/%/%%/gi;
           76 
           77     $document = wrap("","",$document)."\n";
           78     # fix geomyidae ^t design
           79     $document =~ s/\t/    /g;
           80     $document =~ s/\nt/\ntt/g;
           81         printf "\nARTICLE VIEW: \n\n";
           82 
           83     printf decode_entities($document);
           84 
           85     printf "\n[1| <- back to index|/cnn|server|port]";
           86     exit 0;
           87 }
           88 
           89 # Search for links
           90 my $LX = new HTML::LinkExtractor();
           91 $LX->strip(1);
           92 $LX->parse(\$document);
           93 
           94 # Loop at links, match text, add [counter] and generate output.
           95 my $links = "";
           96 foreach my $link ($LX->links) {
           97     foreach my $linkitem (@$link) {
           98 
           99         # skip empty links (image links for example)
          100         if(!$linkitem->{_TEXT}) { next; }
          101         if($linkitem->{_TEXT} eq "CNN") { next; }
          102         if($linkitem->{_TEXT} eq "EspaƱol") { next; }
          103         if($linkitem->{_TEXT} eq "Go to the full CNN experience") { next; }
          104 
          105         # extract link ID
          106         my $shortID = $linkitem->{href};
          107         $shortID =~ s|/en/article/||g;
          108 
          109         # add link to output scalar
          110         $links .= sprintf("[1| * %s|/cnn?%s|codevoid.de|70]\n", $linkitem->{_TEXT}, $shortID );
          111     }   
          112 }
          113 
          114 # nobody needs more that one newline.
          115 $links =~ s/\n\n(\n)*/\n\n/g;
          116 
          117 # fix geomyidae ^t design
          118 $links =~ s/&amp;/\&/gi;
          119 $links =~ s/%/%%/gi;
          120 $links =~ s/\t/    /g;
          121 $links =~ s/\nt/\ntt/g;
          122 
          123 printf "\nINDEX VIEW: \n\n";
          124 print $links;