t* cnn lite on gopher URI git clone git://git.codevoid.de/cnn-gopher.git DIR Log DIR Files DIR Refs --- DIR commit 2848ee81758bacaede370cd7c9045a1edbd859b5 URI Author: Stefan Hagen <sh+git[at]codevoid[dot]de> Date: Tue, 21 Aug 2018 23:20:54 +0200 Initial Commit Diffstat: A cnn-gopher.dcgi | 124 +++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+), 0 deletions(-) --- DIR diff --git a/cnn-gopher.dcgi b/cnn-gopher.dcgi t@@ -0,0 +1,124 @@ +#!/usr/local/bin/perl + +# Author: Stefan Hagen <sh[at]codevoid[dot]de> +# Web: http://codevoid.de +# Document License: ISC (see LICENSE file) + +use strict; +use warnings; + +use XML::LibXML qw( ); +use LWP::UserAgent; +use HTML::LinkExtractor; +use HTML::Restrict; +use HTML::Entities; +use Text::Wrap; +$Text::Wrap::columns=72; +use Encode; + +print ' + .-\') _ .-\') _ + ( OO ) ) ( OO ) ) + .-----. ,--./ ,--,\' ,--./ ,--,\' + \' .--./ | \\ | |\\ | \\ | |\\ + | |(\'-. | \\| | )| \\| | ) + /_) |OO )| . |/ | . |/ + || |`-\'| | |\\ | | |\\ | +(_\' \'--\'\\ | | \\ | | | \\ | + `-----\' `--\' `--\' `--\' `--\' +lite.cnn.io - on gopher - inofficial + +'; + +my $protocol = "http"; +my $server = "lite.cnn.io"; + + +my $uri = "/en"; + +if($ARGV[1]) { + $uri = "/en/article/$ARGV[1]"; +} + +# fetch data +my $REST= ({HOST => "$server", + URL => "$protocol://$server$uri" }); +$REST->{UA} = LWP::UserAgent->new(keep_alive => 0, timeout => 5); +$REST->{UA}->agent("codevoid-cnn-lite-gopherproxy/0.1"); +$REST->{resource} = $REST->{URL}; +$REST->{request} = HTTP::Request->new( GET => $REST->{resource} ); +$REST->{response} = $REST->{UA}->request( $REST->{request} ); + +my $document = $REST->{response}->content; + +if($ARGV[1]) { + # Replace some HTML elements + my $HR = HTML::Restrict->new(); + + $document =~ s/.*(<h1>.*)/$1/g; + $document =~ s/Go to the full CNN experience//g; + + $document =~ s/<h1>/\n# /g; + $document =~ s/<\/h1>/\n/g; + $document =~ s/<div>/\n/g; + $document =~ s/<\/div>/\n/g; + $document =~ s/<p>/\n\n/g; + $document =~ s/<li>/\n\n\* /g; + $document =~ s/<blockquote>/\n\n--- QUOTE ---\n/g; + $document =~ s/<\/blockquote>/\n---- END ----\n\n/g; + + # Strip remaining html + my $document = $HR->process($document); + + # htmldecode (quick fix - could be done properly) + $document =~ s/&/\&/gi; + $document =~ s/%/%%/gi; + + $document = wrap("","",$document)."\n"; + # fix geomyidae ^t design + $document =~ s/\t/ /g; + $document =~ s/\nt/\ntt/g; + printf "\nARTICLE VIEW: \n\n"; + + printf decode_entities($document); + + printf "\n[1| <- back to index|/cnn|server|port]"; + exit 0; +} + +# Search for links +my $LX = new HTML::LinkExtractor(); +$LX->strip(1); +$LX->parse(\$document); + +# Loop at links, match text, add [counter] and generate output. +my $links = ""; +foreach my $link ($LX->links) { + foreach my $linkitem (@$link) { + + # skip empty links (image links for example) + if(!$linkitem->{_TEXT}) { next; } + if($linkitem->{_TEXT} eq "CNN") { next; } + if($linkitem->{_TEXT} eq "EspaƱol") { next; } + if($linkitem->{_TEXT} eq "Go to the full CNN experience") { next; } + + # extract link ID + my $shortID = $linkitem->{href}; + $shortID =~ s|/en/article/||g; + + # add link to output scalar + $links .= sprintf("[1| * %s|/cnn?%s|codevoid.de|70]\n", $linkitem->{_TEXT}, $shortID ); + } +} + +# nobody needs more that one newline. +$links =~ s/\n\n(\n)*/\n\n/g; + +# fix geomyidae ^t design +$links =~ s/&/\&/gi; +$links =~ s/%/%%/gi; +$links =~ s/\t/ /g; +$links =~ s/\nt/\ntt/g; + +printf "\nINDEX VIEW: \n\n"; +print $links;