/markov.pl
use JSON;
use Getopt::Long;
use strict;
my (%wordlist, @wordlist, %db, $js);
GetOptions(
"js" => \$js,
);
my $lastword;
my $c = 0;
{
my $word = shift;
if (!exists $wordlist{$word}) {
$wordlist{$word} = $c++;
push @wordlist, $word;
}
return $wordlist{$word};
}
while (<>) {
chomp;
s/^\s+//;
s/\s+$//;
s/--/ /g;
s/['"()\][<>*&^@~]//g;
my @words = split(/\s+/, $_);
# Make punctuation its own "word"
@words = map {
/^([\w-]+)([,.!?;:])$/ ?
($1, $2) :
$_;
} @words;
for my $w (@words) {
my $wi = w_index(lc $w);
if (not defined $lastword) {
$lastword = $wi;
next;
}
$db{$lastword} = []
unless exists $db{$lastword};
push @{$db{$lastword}}, $wi;
$lastword = $wi;
}
}
if ($js) {
print "var chain = ";
}
print JSON->new->utf8->encode({wordlist => \@wordlist, relations => \%db});
if ($js) {
print ";";
}
print "\n";