t* hacker news on gopher URI git clone git://git.codevoid.de/hn-gopher DIR Log DIR Files DIR Refs --- DIR commit d93b0ad18f02983bb67885550fa8258095545a34 DIR parent 1e75ee197e2cc218e808740151f74f1d3516cc87 URI Author: Stefan Hagen <sh+git[at]codevoid[dot]de> Date: Mon, 30 Jul 2018 15:42:16 +0200 Consolidate ugly brute force regex Diffstat: M hn-scraper.pl | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) --- DIR diff --git a/hn-scraper.pl b/hn-scraper.pl t@@ -51,7 +51,7 @@ sub scrapeSubComments { my $author = encode("UTF-8", $comment->{'author'}); my $objectID = $comment->{'objectID'}; $output .= formatContent("$author:", $lvl); - $output .= formatContent("$text", $lvl)."\n\n"; + $output .= formatContent("$text", $lvl)."\n"; $output .= scrapeSubComments( $payload, $objectID, ++$lvl ); $lvl--; } t@@ -122,21 +122,8 @@ sub formatContent { $c++; $content_clean =~ s/(\Q$linkitem->{_TEXT}\E)/ \[$c\] /g; - # FIXME FIXME FIXME - # It's late and the below works. - $content_clean =~ s/\n \[$c\] / \[$c\] /g; - $content_clean =~ s/\n \[$c\] / \[$c\] /g; - - $content_clean =~ s/\[$c\] \n/ \[$c\] /g; - $content_clean =~ s/\[$c\] \n/ \[$c\] /g; - - $content_clean =~ s/\n \[$c\] \n/ \[$c\] /g; - $content_clean =~ s/\n \[$c\] \n/ \[$c\] /g; - - $content_clean =~ s/ / /g; - $content_clean =~ s/ / /g; - $content_clean =~ s/ / /g; - # FIXME FIXME FIXME + # make sure there are no newlines/extra spaces around [0] + $content_clean =~ s/[\s\n]+\[$c\][\s\n]+/ \[$c\] /g; # shorten links my $short = $linkitem->{href};