t* cnn lite on gopher URI git clone git://git.codevoid.de/cnn-gopher.git DIR Log DIR Files DIR Refs --- tcnn-gopher.dcgi (3237B) --- 1 #!/usr/local/bin/perl 2 3 # Author: Stefan Hagen <sh[at]codevoid[dot]de> 4 # Web: http://codevoid.de 5 # Document License: ISC (see LICENSE file) 6 7 use strict; 8 use warnings; 9 10 use XML::LibXML qw( ); 11 use LWP::UserAgent; 12 use HTML::LinkExtractor; 13 use HTML::Restrict; 14 use HTML::Entities; 15 use Text::Wrap; 16 $Text::Wrap::columns=72; 17 use Encode; 18 19 print ' 20 .-\') _ .-\') _ 21 ( OO ) ) ( OO ) ) 22 .-----. ,--./ ,--,\' ,--./ ,--,\' 23 \' .--./ | \\ | |\\ | \\ | |\\ 24 | |(\'-. | \\| | )| \\| | ) 25 /_) |OO )| . |/ | . |/ 26 || |`-\'| | |\\ | | |\\ | 27 (_\' \'--\'\\ | | \\ | | | \\ | 28 `-----\' `--\' `--\' `--\' `--\' 29 lite.cnn.io - on gopher - inofficial 30 31 '; 32 33 my $protocol = "http"; 34 my $server = "lite.cnn.io"; 35 36 37 my $uri = "/en"; 38 39 if($ARGV[1]) { 40 $uri = "/en/article/$ARGV[1]"; 41 } 42 43 # fetch data 44 my $REST= ({HOST => "$server", 45 URL => "$protocol://$server$uri" }); 46 $REST->{UA} = LWP::UserAgent->new(keep_alive => 0, timeout => 5); 47 $REST->{UA}->agent("codevoid-cnn-lite-gopherproxy/0.1"); 48 $REST->{resource} = $REST->{URL}; 49 $REST->{request} = HTTP::Request->new( GET => $REST->{resource} ); 50 $REST->{response} = $REST->{UA}->request( $REST->{request} ); 51 52 my $document = $REST->{response}->content; 53 54 if($ARGV[1]) { 55 # Replace some HTML elements 56 my $HR = HTML::Restrict->new(); 57 58 $document =~ s/.*(<h1>.*)/$1/g; 59 $document =~ s/Go to the full CNN experience//g; 60 61 $document =~ s/<h1>/\n# /g; 62 $document =~ s/<\/h1>/\n/g; 63 $document =~ s/<div>/\n/g; 64 $document =~ s/<\/div>/\n/g; 65 $document =~ s/<p>/\n\n/g; 66 $document =~ s/<li>/\n\n\* /g; 67 $document =~ s/<blockquote>/\n\n--- QUOTE ---\n/g; 68 $document =~ s/<\/blockquote>/\n---- END ----\n\n/g; 69 70 # Strip remaining html 71 my $document = $HR->process($document); 72 73 # htmldecode (quick fix - could be done properly) 74 $document =~ s/&/\&/gi; 75 $document =~ s/%/%%/gi; 76 77 $document = wrap("","",$document)."\n"; 78 # fix geomyidae ^t design 79 $document =~ s/\t/ /g; 80 $document =~ s/\nt/\ntt/g; 81 printf "\nARTICLE VIEW: \n\n"; 82 83 printf decode_entities($document); 84 85 printf "\n[1| <- back to index|/cnn|server|port]"; 86 exit 0; 87 } 88 89 # Search for links 90 my $LX = new HTML::LinkExtractor(); 91 $LX->strip(1); 92 $LX->parse(\$document); 93 94 # Loop at links, match text, add [counter] and generate output. 95 my $links = ""; 96 foreach my $link ($LX->links) { 97 foreach my $linkitem (@$link) { 98 99 # skip empty links (image links for example) 100 if(!$linkitem->{_TEXT}) { next; } 101 if($linkitem->{_TEXT} eq "CNN") { next; } 102 if($linkitem->{_TEXT} eq "EspaƱol") { next; } 103 if($linkitem->{_TEXT} eq "Go to the full CNN experience") { next; } 104 105 # extract link ID 106 my $shortID = $linkitem->{href}; 107 $shortID =~ s|/en/article/||g; 108 109 # add link to output scalar 110 $links .= sprintf("[1| * %s|/cnn?%s|codevoid.de|70]\n", $linkitem->{_TEXT}, $shortID ); 111 } 112 } 113 114 # nobody needs more that one newline. 115 $links =~ s/\n\n(\n)*/\n\n/g; 116 117 # fix geomyidae ^t design 118 $links =~ s/&/\&/gi; 119 $links =~ s/%/%%/gi; 120 $links =~ s/\t/ /g; 121 $links =~ s/\nt/\ntt/g; 122 123 printf "\nINDEX VIEW: \n\n"; 124 print $links;