aboutsummarylogtreecommitdiffstats
path: root/wiki.pl
diff options
context:
space:
mode:
Diffstat (limited to 'wiki.pl')
-rw-r--r--wiki.pl69
1 files changed, 69 insertions, 0 deletions
diff --git a/wiki.pl b/wiki.pl
new file mode 100644
index 000000000000..afc9b2b4efc6
--- /dev/null
+++ b/wiki.pl
@@ -0,0 +1,69 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+use utf8;
+
+use MediaWiki::API;
+
+binmode STDOUT, ":utf8";
+
+my $mw = MediaWiki::API->new({ api_url => 'http://en.wikiquote.org/w/api.php' });
+
+# TODO fetch a specific, stable revision
+# TODO add series numbers + episode names to the end?
+# TODO actually scrape it into an array?
+
+my $page = $mw->get_page( { title => 'Farscape' } );
+
+if ($#ARGV == 0 && $ARGV[0] eq "revision") {
+ print $page->{ 'revid' };
+ exit;
+}
+
+if ($#ARGV > -1) {
+ die;
+}
+
+$_ = $page->{ '*' };
+
+# remove everything after, and including, the cast section
+s/== Cast ==.*$//s;
+
+# create seperators from horizontal rules, {,sub}section boundaries, quotes
+s/^\s*<hr.+?\/>\s*$/%/gim;
+
+s/^===.+?===$/%/gm;
+
+s/^==.+?==$/%/gm;
+
+s/^\s*\*\s*$/%/gm;
+
+# remove empty lines
+s/\n{2,}/\n/g;
+
+# collapse any multiple occurances of '%'
+s/%\n(%\n)+/%\n/g;
+
+# remove everything before, and including, the first '%'
+s/.*?%\n//s;
+
+# remove leading colons
+s/^://gm;
+
+# collapse bold and italic text.
+s/'''(.+?)'''/$1/g;
+
+s/''(.+?)''/$1/g;
+
+# replace wiki links with just the text
+s/\[\[(w:.+?\|)?(.+?)\]\]/$2/g;
+
+# remove trailing whitespace
+s/\s+$//gm;
+# remove preceding whitespace
+s/^\s+//gm;
+# collapse runs of spaces
+s/(\s)\s+/$1/gm;
+
+print;