From eae0b74ddd688507209e85b54ca3a4e67a7f5aed Mon Sep 17 00:00:00 2001 From: Thomas Jansen Date: Mon, 2 Nov 2009 00:49:11 +0100 Subject: [PATCH] lyricwiki: convert numeric HTML escape sequences to proper characters I've stumbled across several cases of obfuscated lyrics that use the numeric HTML escape sequences. --- lyrics/02-lyricwiki.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lyrics/02-lyricwiki.rb b/lyrics/02-lyricwiki.rb index b3b7028..db7b970 100755 --- a/lyrics/02-lyricwiki.rb +++ b/lyrics/02-lyricwiki.rb @@ -23,6 +23,7 @@ require 'uri' require 'net/http' +require 'cgi' url = "http://lyrics.wikia.com/api.php?action=lyrics&fmt=xml&func=getSong" + \ "&artist=#{URI.escape(ARGV[0])}&song=#{URI.escape(ARGV[1])}" @@ -47,4 +48,4 @@ if not $1 =~ /^.*<\/div>(.*?)$/im exit(1) end -puts $1.gsub(/
/, "\n") +puts CGI::unescapeHTML($1.gsub(/
/, "\n")) -- 2.30.2