/youtube-subrip
#!/usr/bin/perl
use LWP::Simple;
use XML::Twig;
use strict;
my $url = shift;
$url =~ /v=([A-Za-z0-9_-]+)/;
my $id = $1 || $url;
my ($xml, $t, %langs, $lang_default, $name);
print STDERR "Fetching subtitle list...";
$xml = get("http://video.google.com/timedtext?hl=en&v=$id&type=list");
if (defined $xml) {
print STDERR "OK\n";
} else {
print STDERR "Failed\n";
exit 1;
}
$t = XML::Twig->new(
twig_roots => { 'track' => \&add_track }
);
$t->parse($xml);
sub add_track {
my ($t, $elt) = @_;
$langs{$elt->att('lang_code')} = $elt->att('name');
if ($elt->att('lang_default') eq 'true') {
$lang_default = $elt->att('lang_code');
}
}
$name = $langs{$lang_default};
print STDERR "Fetching subtitles... ";
$xml = get("http://video.google.com/timedtext?hl=en&v=$id&type=track&name=$name&lang=en");
if (defined $xml) {
print STDERR "OK\n";
} else {
print STDERR "Failed\n";
exit 1;
}
my $t = XML::Twig->new(
twig_roots => { 'text' => \&convert_text }
);
$t->parse($xml);
my $sub_count = 0;
sub ent_decode {
local $_ = shift;
s/&#(\d+);/chr($1)/ge;
s/"/"/g;
s/&/&/g;
s/'/'/g;
s/</</g;
s/>/>/g;
return $_;
}
sub timecode {
my $n = shift;
my $in = int($n);
my $frac = ($n - $in) * 1000;
my $s = $in % 60;
my $m = int($in / 60) % 60;
my $h = int($in / 3600);
return sprintf("%02d:%02d:%02d,%03d", $h, $m, $s, $frac);
}
sub convert_text {
my ($t, $elt) = @_;
my $start = $elt->att('start');
my $end = $elt->att('start') + $elt->att('dur');
$sub_count++;
print "$sub_count\n";
print timecode($start), ' --> ', timecode($end), "\n";
print ent_decode($elt->first_child_text);
print "\n\n";
$elt->purge();
}