/youtube-subrip
#!/usr/bin/perl use LWP::Simple; use XML::Twig; use strict; my $url = shift; $url =~ /v=([A-Za-z0-9_-]+)/; my $id = $1 || $url; my ($xml, $t, %langs, $lang_default, $name); print STDERR "Fetching subtitle list..."; $xml = get("http://video.google.com/timedtext?hl=en&v=$id&type=list"); if (defined $xml) { print STDERR "OK\n"; } else { print STDERR "Failed\n"; exit 1; } $t = XML::Twig->new( twig_roots => { 'track' => \&add_track } ); $t->parse($xml); sub add_track { my ($t, $elt) = @_; $langs{$elt->att('lang_code')} = $elt->att('name'); if ($elt->att('lang_default') eq 'true') { $lang_default = $elt->att('lang_code'); } } $name = $langs{$lang_default}; print STDERR "Fetching subtitles... "; $xml = get("http://video.google.com/timedtext?hl=en&v=$id&type=track&name=$name&lang=en"); if (defined $xml) { print STDERR "OK\n"; } else { print STDERR "Failed\n"; exit 1; } my $t = XML::Twig->new( twig_roots => { 'text' => \&convert_text } ); $t->parse($xml); my $sub_count = 0; sub ent_decode { local $_ = shift; s/&#(\d+);/chr($1)/ge; s/"/"/g; s/&/&/g; s/'/'/g; s/</</g; s/>/>/g; return $_; } sub timecode { my $n = shift; my $in = int($n); my $frac = ($n - $in) * 1000; my $s = $in % 60; my $m = int($in / 60) % 60; my $h = int($in / 3600); return sprintf("%02d:%02d:%02d,%03d", $h, $m, $s, $frac); } sub convert_text { my ($t, $elt) = @_; my $start = $elt->att('start'); my $end = $elt->att('start') + $elt->att('dur'); $sub_count++; print "$sub_count\n"; print timecode($start), ' --> ', timecode($end), "\n"; print ent_decode($elt->first_child_text); print "\n\n"; $elt->purge(); }