commit:a3ca9a9933325f5341c2227c2565b5e5b015a064
author:Norris
committer:Norris
date:Fri Dec 4 13:27:53 2009 -0800
parents:cbc6833cf03753910a97352884cbd825ee697044
Add the capability to use basic HTML by filtering
diff --git a/Vector/HtmlFilter.pm b/Vector/HtmlFilter.pm
line changes: +83/-0
index 0000000..d0e75e9
--- /dev/null
+++ b/Vector/HtmlFilter.pm
@@ -0,0 +1,83 @@
+package Vector::HtmlFilter;
+use strict;
+
+my %html = (
+	#a => { href => 1 },
+	b => {},
+	i => {},
+	u => {},
+	#img => { src => 1, width => 1, height => 1, alt => 1 },
+	#hr => {},
+	#blockquote => {},
+	#table => { width => 1, border => 1 },
+	#tr => {},
+	#td => { align => 1 },
+	#abbr => {},
+	#acronym => {},
+	big => {},
+	#br => {},
+	#cite => {},
+	code => {},
+	#em => {},
+	#ol => {},
+	#ul => {},
+	#li => {},
+	#p => {},
+	pre => {},
+	s => {},
+	small => {},
+	'sub' => {},
+	sup => {},
+);
+
+my $nuke = 'script';
+my $oingoboingo = 'applet|embed|object';
+
+sub htmlfilter($) {
+	my $str = shift;
+	my $c = 0;
+
+	# Nuke dangerous tags
+	$$str =~ s#<($nuke)[^>]*>.*?</\1>#[$1 tag nuked]#gs;
+
+	# And these people love little girls
+	$$str =~ s#<($oingoboingo)[^>]*>.*?</\1>#<p><a href="http://www.youtube.com/watch?v=jItz-uNjoZA">I love little girls</a>.#gs;
+
+	# Filter problematic tags and attributes
+	while ($$str =~ m'\G[^<]*(<(/?)(\w+)\s*([^>]*)\s*/?>)'gs) {
+		my $suspect = $1;
+		my $end = $2;
+		my $tag = $3;
+		my @attrs = split(/\s+/, $4);
+		if (defined $html{$tag}) {
+			my @oattrs = ();
+			for my $a (@attrs) {
+				my ($atag, $value) = split('=', $a);
+				next if ($atag eq 'href' && $value =~ /["']?javascript:/);
+				if ($html{$tag}->{$atag}) {
+					push @oattrs, $a;
+				}
+			}
+			my $rtag = "<$end$tag" . (@oattrs ? ' ' : '') . join(' ', @oattrs) . '>';
+			my $newpos = pos($$str) - length($suspect) + length($rtag);
+			substr($$str, $-[1], $+[1] - $-[1], $rtag);
+			# Reset pos() so \G starts from the right place
+			pos($$str) = $newpos;
+			die "Too many tags" if ++$c == 10000;
+		} else {
+			substr($$str, $-[1], $+[1] - $-[1], '');
+		}
+	}
+}
+
+sub textify {
+	my $str = shift;
+
+	$$str =~ s'</?b[^>]*>'*'g;
+	$$str =~ s'</?i[^>]*>'/'g;
+	$$str =~ s'</?u[^>]*>'_'g;
+
+	$$str =~ s'<[^>]*>''g;
+}
+
+1;

diff --git a/Vector/Notify.pm b/Vector/Notify.pm
line changes: +3/-0
index 0f99299..782b7c3
--- a/Vector/Notify.pm
+++ b/Vector/Notify.pm
@@ -4,6 +4,7 @@ use Vector::DB;
 use Vector::Util;
 use Vector::Channel;
 use Vector::User;
+use Vector::HtmlFilter;
 use strict;
 
 sub get {
@@ -70,6 +71,8 @@ sub queue {
 	my $sth;
 	my %watchers;
 
+	Vector::HtmlFilter::textify \$message;
+
 	# Queue for everyone watching this thread
 	$sth = $dbh->prepare('SELECT users.user_id, users.email FROM watch LEFT JOIN users ON users.user_id = watch.user_id WHERE channel_id = ? AND thread = ?');
 	$sth->execute($channel_id, $thread);

diff --git a/Vector/Post.pm b/Vector/Post.pm
line changes: +6/-1
index 6a81288..a0ff21a
--- a/Vector/Post.pm
+++ b/Vector/Post.pm
@@ -8,11 +8,14 @@ use Vector::Util qw/simplify_uri xmlescape/;
 use Vector::Config qw/$webroot $thumbdir $datadir/;
 use Vector::Notify;
 use Vector::ReplyTag;
+use Vector::HtmlFilter;
 use strict;
 
 sub new {
 	my ($class, $user_id, $channel_id, $data, $file, $replyto) = @_;
 
+	Vector::HtmlFilter::htmlfilter \$data;
+
 	my $self = {
 		type => 'post',
 		user_id => $user_id,
@@ -48,6 +51,8 @@ sub save {
 	my ($self) = @_;
 	my $dbh = Vector::DB::connect;
 
+	Vector::HtmlFilter::htmlfilter \$self->{data};
+
 	if ($self->{post_id}) {
 		$dbh->do('UPDATE posts set data, ts) VALUES (?,NOW())', undef, $self->{data})
 			or die $dbh->errstr;
@@ -113,7 +118,7 @@ sub content {
 			$r .= "\n";
 		}
 	}
-	$r .= Vector::Util::linebreak(xmlescape $self->{data});
+	$r .= Vector::Util::linebreak($self->{data});
 
 	return $r;
 }