commit:0659caf85c8b8ba618e54c4ac64e7df32c00ed97
author:Chip Black
committer:Chip Black
date:Wed Jul 8 17:11:02 2015 -0500
parents:debf6bb842d610e500f1cd22a2320993feadd672
Add markov chain generator
diff --git a/README.md b/README.md
line changes: +15/-1
index b826075..3fef2f9
--- a/README.md
+++ b/README.md
@@ -2,7 +2,21 @@
 A simple markov chain bot
 
 ## Installation
-Requires `JSON`, `POE`, and `POE::Component::IRC`.
+Requires `JSON`, `POE`, and `POE::Component::IRC`, all of which are of course
+available from [CPAN](http://cpan.org).
 
 ## Configuration
 Copy config.pm.example to config.pm and edit to match your setup.
+WonderlandAlice does not support passwords, nickservs, SSL, or other advanced
+authentication mechanisms.  PRs welcome.
+
+## Creating your own markov chain
+`markov.pl` transforms an input file into a JSON structure defining the
+relationships between the words.  Use it like so:
+
+    $ perl markov.pl text > chain.json
+
+The bot loads the markov chain from chain.json when it starts up, so you will
+have to restart the bot to load new data.  `markov.pl` also accepts a `--js`
+flag that makes it output a JS snippet that loads the data into a `chain`
+variable.  It's intended for use when embedding the chain into a JS program.

diff --git a/markov.pl b/markov.pl
line changes: +64/-0
index 0000000..5f1c8a6
--- /dev/null
+++ b/markov.pl
@@ -0,0 +1,64 @@
+#!/usr/bin/perl
+use JSON;
+use Getopt::Long;
+use strict;
+
+my (%wordlist, @wordlist, %db, $js);
+
+GetOptions(
+	"js" => \$js,
+);
+
+my $lastword;
+my $c = 0;
+
+sub w_index {
+    my $word = shift;
+    if (!exists $wordlist{$word}) {
+        $wordlist{$word} = $c++;
+        push @wordlist, $word;
+    }
+    return $wordlist{$word};
+}
+
+while (<>) {
+    chomp;
+    s/^\s+//;
+    s/\s+$//;
+    s/--/ /g;
+    s/['"()\][<>*&^@~]//g;
+    my @words = split(/\s+/, $_);
+    # Make punctuation its own "word"
+    @words = map {
+        /^([\w-]+)([,.!?;:])$/ ?
+            ($1, $2) :
+            $_;
+    } @words;
+
+    for my $w (@words) {
+        my $wi = w_index(lc $w);
+        if (not defined $lastword) {
+            $lastword = $wi;
+            next;
+        }
+
+        $db{$lastword} = []
+            unless exists $db{$lastword};
+
+        push @{$db{$lastword}}, $wi;
+
+        $lastword = $wi;
+    }
+}
+
+if ($js) {
+	print "var chain = ";
+}
+
+print JSON->new->utf8->encode({wordlist => \@wordlist, relations => \%db});
+
+if ($js) {
+	print ";";
+}
+
+print "\n";