/Dreamlands/Dreamer.pm
package Dreamlands::Dreamer;
use v5.10;
use strict;
use warnings;

use Exporter 'import';
use DBI;
use Digest::SHA qw/sha1/;

our @EXPORT_OK = qw(seed generate_paragraph);

my $dbh = DBI->connect("dbi:SQLite:dbname=markov.db", "", "");

my $id_token_h = $dbh->prepare('SELECT token FROM tokens WHERE id = ?');

sub seed {
    my $text = shift;
    my $bits = sha1($text);
    my $n = unpack("L", $bits);
    srand($n);
}

sub get_token {
    my $tid = shift;
    $id_token_h->execute($tid);
    $id_token_h->fetchrow_array
}

sub random_select {
    $_[int(rand(@_))]
}

sub generate_paragraph {
    my %opts = @_;
    if (!defined $opts{sentences}) {
        $opts{sentences} = [2, 5];
    }
    if (!defined $opts{max_tokens}) {
        $opts{max_tokens} = 500;
    }

    my @words_out;
    my $glom_next = '';
    my @t;
    my $next_token_h = $dbh->prepare('SELECT next FROM seq2 WHERE t1=? AND t2=?');

    my $crank = sub {
        $next_token_h->execute(@t);
        my $next = random_select(@{$next_token_h->fetchall_arrayref})->[0];
        if (!defined $next) {
            $next = random_select(
                $dbh->selectall_array('SELECT next FROM seq2 WHERE t2=?', undef, $t[1])
            )->[0];
            if (!defined $next) {
                # uhhhh, punt
                @t = $dbh->selectrow_array('SELECT t1, t2, next FROM seq2 ORDER BY RANDOM() LIMIT 1');
                $next = shift @t;
            }
        }
        my $token = get_token($next);
        push @t, $next;
        shift @t;

        my $chomp = substr($token, 0, 1);
        if ($chomp eq '>') {
            $glom_next .= substr($token, 1);
        } elsif ($chomp eq '<') {
            if (@words_out == 0) {
                push @words_out, substr($token, 1);
            } else {
                $words_out[$#words_out] .= substr($token, 1);
            }
            $glom_next = '';
        } elsif ($chomp eq '|') {
            push @words_out, substr($token, 1);
            $glom_next = '';
        }
        return $token;
    };

    # So first we need to bootstrap a couple of plausible words. We start by
    # getting the token id for a terminal period.
    @t = $dbh->selectrow_array('SELECT id FROM tokens WHERE token = "<."');
    # Then seed our token sequence with it and a word that precedes it.
    @t = @{random_select(
        $dbh->selectall_array('SELECT t1, t2 FROM seq2 WHERE t2=?', undef, @t)
    )};

    my $target_sentences = int(rand($opts{sentences}->[1] - $opts{sentences}->[0])) + $opts{sentences}->[0];
    my $sentences = 0;
    my $token_count = 0;
    while ($token_count < $opts{max_tokens}) {
        my $token = &$crank;
        $token_count++;
        if ($token eq '<.' || $token eq '<!' || $token eq '<?') {
            $sentences++;
            last if $sentences == $target_sentences;
        }
    }

    return @words_out;
}

1;