t* fefes blog on gopher
       
   URI git clone git://git.codevoid.de/fefe-gopher
   DIR Log
   DIR Files
   DIR Refs
   DIR LICENSE
       ---
       tfefe.dcgi (3982B)
       ---
            1 #!/usr/local/bin/perl
            2 
            3 # Author:  Stefan Hagen <sh[at]codevoid[dot]de>
            4 # Web:     http://codevoid.de
            5 # Document License: ISC (see LICENSE file)
            6 
            7 use strict;
            8 use warnings;
            9 
           10 use XML::LibXML qw( );
           11 use LWP::UserAgent;
           12 use HTML::LinkExtractor;
           13 use HTML::Restrict;
           14 use Text::Wrap;
           15 $Text::Wrap::columns=72;
           16 use Encode;
           17 
           18 print '
           19   ___       __              ___  _            
           20  | __|___  / _| ___  ___   | _ )| | ___  __ _ 
           21  | _|/ -_)|  _|/ -_)(_-<   | _ \| |/ _ \/ _` |
           22  |_| \___||_|  \___|/__/   |___/|_|\___/\__, |
           23  On Gopher (inofficial)                 |___/ 
           24 -----------------------------------------------------------------------
           25 [h| Visit Fefes Blog on the Internet|URL:https://blog.fefe.de|codevoid.de|70]
           26 -----------------------------------------------------------------------
           27 
           28 
           29 ';
           30 
           31 # Fefes Blog Config
           32 my $protocol = "https";
           33 my $server   = "blog.fefe.de";
           34 my $uri      = "/rss.xml?html";
           35 
           36 # fetch data
           37 my $REST= ({HOST => "$server",
           38             URL  => "$protocol://$server$uri" });
           39 $REST->{UA} = LWP::UserAgent->new(keep_alive => 0, timeout => 5);
           40 $REST->{UA}->agent("codevoid-fefe-gopherproxy/0.1"); 
           41 $REST->{resource} = $REST->{URL};
           42 $REST->{request}  = HTTP::Request->new( GET => $REST->{resource} );
           43 $REST->{response} = $REST->{UA}->request( $REST->{request} );
           44 
           45 # parse data
           46 my $parser = XML::LibXML->new();
           47 my $document = $parser->parse_string($REST->{response}->content);
           48 my $root = $document->documentElement();
           49 
           50 # loop through items
           51 foreach my $channel ($root->findnodes('channel')) {
           52     foreach my $item ($channel->findnodes('item')) {
           53 
           54         # Encode to proper utf8
           55         my $description  = encode("UTF-8", $item->findvalue('description'));
           56         $description =~ s|<a href="/|<a href="https://blog.fefe.de/|ig;
           57 
           58         # Search for links
           59         my $LX = new HTML::LinkExtractor();
           60         $LX->strip(1);
           61         $LX->parse(\$description);
           62 
           63         # Replace some HTML elements
           64         my $HR = HTML::Restrict->new();
           65         $description =~ s/<p>/\n\n/g;
           66         $description =~ s/<li>/\n\n\* /g;
           67         $description =~ s/<blockquote>/\n\n--- QUOTE ---\n/g;
           68         $description =~ s/<\/blockquote>/\n---- END ----\n\n/g;
           69 
           70         # Strip remaining html
           71         my $description_clean = $HR->process($description);
           72 
           73         # htmldecode (quick fix - could be done properly)
           74         $description_clean =~ s/&amp;/\&/gi;
           75 
           76         # Loop at links, match text, add [counter] and generate output.
           77         my $c = 0;
           78         my $links = "";
           79         foreach my $link ($LX->links) {
           80             foreach my $linkitem (@$link) {
           81 
           82                 # skip empty links (image links for example)
           83                 if(!$linkitem->{_TEXT}) { next; }
           84 
           85                 $c++;
           86                 $description_clean =~ s/(\Q$linkitem->{_TEXT}\E)/$1\[$c]/g;
           87 
           88                 # shorten links
           89                 my $short = $linkitem->{href};
           90                 if(length($short) > 62) { $short = substr($short,0,62)." ..."; }
           91 
           92                 # add link to output scalar
           93                 $links .= sprintf("[h|[%i]: %s|URL:%s|codevoid.de|70]\n", $c, $short, $linkitem->{href});
           94             }   
           95         }
           96 
           97         # Wrap to 80 character width
           98         $description_clean = wrap("","",$description_clean)."\n";
           99 
          100         # nobody needs more that one newline.
          101         $description_clean =~ s/\n\n(\n)*/\n\n/g;
          102 
          103         # fix geomyidae ^t design
          104         $description_clean =~ s/\t/    /g;
          105         $description_clean =~ s/\nt/\ntt/g;
          106 
          107         # print!
          108         print $description_clean;
          109         # print links if there were any.
          110         if($links) {
          111             print "\nLinks:\n", $links;
          112         }
          113         print "\n***********************************************************************\n\n";
          114     }
          115 }
          116 print "[1|<- go back|/|codevoid.de|70]";
          117 
          118 
          119 # TODO:
          120 # * consolidate regex and call the regex parser only twice (before parsing => strip / replace html,
          121 #   after parsing => format output)
          122 # * proper htmldecode the output
          123 # * maybe ask fefe to include the dates into the feed.
          124 # * add source link somewhere