t* fefes blog on gopher URI git clone git://git.codevoid.de/fefe-gopher DIR Log DIR Files DIR Refs DIR LICENSE --- tfefe.dcgi (3982B) --- 1 #!/usr/local/bin/perl 2 3 # Author: Stefan Hagen <sh[at]codevoid[dot]de> 4 # Web: http://codevoid.de 5 # Document License: ISC (see LICENSE file) 6 7 use strict; 8 use warnings; 9 10 use XML::LibXML qw( ); 11 use LWP::UserAgent; 12 use HTML::LinkExtractor; 13 use HTML::Restrict; 14 use Text::Wrap; 15 $Text::Wrap::columns=72; 16 use Encode; 17 18 print ' 19 ___ __ ___ _ 20 | __|___ / _| ___ ___ | _ )| | ___ __ _ 21 | _|/ -_)| _|/ -_)(_-< | _ \| |/ _ \/ _` | 22 |_| \___||_| \___|/__/ |___/|_|\___/\__, | 23 On Gopher (inofficial) |___/ 24 ----------------------------------------------------------------------- 25 [h| Visit Fefes Blog on the Internet|URL:https://blog.fefe.de|codevoid.de|70] 26 ----------------------------------------------------------------------- 27 28 29 '; 30 31 # Fefes Blog Config 32 my $protocol = "https"; 33 my $server = "blog.fefe.de"; 34 my $uri = "/rss.xml?html"; 35 36 # fetch data 37 my $REST= ({HOST => "$server", 38 URL => "$protocol://$server$uri" }); 39 $REST->{UA} = LWP::UserAgent->new(keep_alive => 0, timeout => 5); 40 $REST->{UA}->agent("codevoid-fefe-gopherproxy/0.1"); 41 $REST->{resource} = $REST->{URL}; 42 $REST->{request} = HTTP::Request->new( GET => $REST->{resource} ); 43 $REST->{response} = $REST->{UA}->request( $REST->{request} ); 44 45 # parse data 46 my $parser = XML::LibXML->new(); 47 my $document = $parser->parse_string($REST->{response}->content); 48 my $root = $document->documentElement(); 49 50 # loop through items 51 foreach my $channel ($root->findnodes('channel')) { 52 foreach my $item ($channel->findnodes('item')) { 53 54 # Encode to proper utf8 55 my $description = encode("UTF-8", $item->findvalue('description')); 56 $description =~ s|<a href="/|<a href="https://blog.fefe.de/|ig; 57 58 # Search for links 59 my $LX = new HTML::LinkExtractor(); 60 $LX->strip(1); 61 $LX->parse(\$description); 62 63 # Replace some HTML elements 64 my $HR = HTML::Restrict->new(); 65 $description =~ s/<p>/\n\n/g; 66 $description =~ s/<li>/\n\n\* /g; 67 $description =~ s/<blockquote>/\n\n--- QUOTE ---\n/g; 68 $description =~ s/<\/blockquote>/\n---- END ----\n\n/g; 69 70 # Strip remaining html 71 my $description_clean = $HR->process($description); 72 73 # htmldecode (quick fix - could be done properly) 74 $description_clean =~ s/&/\&/gi; 75 76 # Loop at links, match text, add [counter] and generate output. 77 my $c = 0; 78 my $links = ""; 79 foreach my $link ($LX->links) { 80 foreach my $linkitem (@$link) { 81 82 # skip empty links (image links for example) 83 if(!$linkitem->{_TEXT}) { next; } 84 85 $c++; 86 $description_clean =~ s/(\Q$linkitem->{_TEXT}\E)/$1\[$c]/g; 87 88 # shorten links 89 my $short = $linkitem->{href}; 90 if(length($short) > 62) { $short = substr($short,0,62)." ..."; } 91 92 # add link to output scalar 93 $links .= sprintf("[h|[%i]: %s|URL:%s|codevoid.de|70]\n", $c, $short, $linkitem->{href}); 94 } 95 } 96 97 # Wrap to 80 character width 98 $description_clean = wrap("","",$description_clean)."\n"; 99 100 # nobody needs more that one newline. 101 $description_clean =~ s/\n\n(\n)*/\n\n/g; 102 103 # fix geomyidae ^t design 104 $description_clean =~ s/\t/ /g; 105 $description_clean =~ s/\nt/\ntt/g; 106 107 # print! 108 print $description_clean; 109 # print links if there were any. 110 if($links) { 111 print "\nLinks:\n", $links; 112 } 113 print "\n***********************************************************************\n\n"; 114 } 115 } 116 print "[1|<- go back|/|codevoid.de|70]"; 117 118 119 # TODO: 120 # * consolidate regex and call the regex parser only twice (before parsing => strip / replace html, 121 # after parsing => format output) 122 # * proper htmldecode the output 123 # * maybe ask fefe to include the dates into the feed. 124 # * add source link somewhere