This page contains automated test results for code from O'Reilly's Ruby Cookbook. If this code looks interesting or useful, you might want to buy the whole book.

Parsing Invalid Markup
Code	Expected	Actual
require 'rubygems' require 'rubyful_soup' invalid_html = 'A lot of <b class=1>tags are <i class=2>never closed.' soup = BeautifulSoup.new(invalid_html) puts soup.prettify	A lot of <b class="1">tags are <i class="2">never closed. </i> </b>	A lot of <b class="1">tags are <i class="2">never closed. </i> </b>
soup.b.i	<i class="2">never closed.</i>	<i class="2">never closed.</i>
soup.i	<i class="2">never closed.</i>	<i class="2">never closed.</i>
soup.find(nil, :attrs=>{'class' => '2'})	<i class="2">never closed.</i>	<i class="2">never closed.</i>
soup.find_all('i')	[<i class="2">never closed.</i>]	[<i class="2">never closed.</i>]
soup.b['class']	"1"	"1"
soup.find_text(/closed/)	"never closed."	"never closed."
require 'rubygems' require 'html/sgml-parser' require 'set' html = %{<a name="anchor"><a href="http://www.oreilly.com">O'Reilly</a> <b>irrelevant</b><a href="http://www.ruby-lang.org/">Ruby</a>} class LinkGrabber < HTML::SGMLParser attr_reader :urls def initialize @urls = Set.new super end def do_a(attrs) url = attrs.find { \|attr\| attr[0] == 'href' } @urls << url[1] if url end end extractor = LinkGrabber.new extractor.feed(html) extractor.urls	#<Set: {"http://www.ruby-lang.org/", "http://www.oreilly.com"}>	#<Set: {"http://www.ruby-lang.org/", "http://www.oreilly.com"}>
require 'rubyful_soup' urls = Set.new BeautifulStoneSoup.new(html).find_all('a').each do \|tag\| urls << tag['href'] if tag['href'] end puts BeautifulStoneSoup.new(html, :parse_only_these => 'a')	<a name="anchor"></a> <a href="http://www.oreilly.com">O'Reilly</a> <a href="http://www.ruby-lang.org/">Ruby</a>	<a name="anchor"></a><a href="http://www.oreilly.com">O'Reilly</a><a href="http://www.ruby-lang.org/">Ruby</a>