# -*- coding: utf-8 -*- """Unit tests for Beautiful Soup. These tests make sure the Beautiful Soup works as it should. If you find a bug in Beautiful Soup, the best way to express it is as a test case like this that fails.""" import unittest from BeautifulSoup import * class SoupTest(unittest.TestCase): def assertSoupEquals(self, toParse, rep=None, c=BeautifulSoup): """Parse the given text and make sure its string rep is the other given text.""" if rep == None: rep = toParse self.assertEqual(str(c(toParse)), rep) class FollowThatTag(SoupTest): "Tests the various ways of fetching tags from a soup." def setUp(self): ml = """ 1 2 3 4 4""" self.soup = BeautifulStoneSoup(ml) def testFindAllByName(self): matching = self.soup('a') self.assertEqual(len(matching), 2) self.assertEqual(matching[0].name, 'a') self.assertEqual(matching, self.soup.findAll('a')) self.assertEqual(matching, self.soup.findAll(SoupStrainer('a'))) def testFindAllByAttribute(self): matching = self.soup.findAll(id='x') self.assertEqual(len(matching), 2) self.assertEqual(matching[0].name, 'a') self.assertEqual(matching[1].name, 'b') matching2 = self.soup.findAll(attrs={'id' : 'x'}) self.assertEqual(matching, matching2) strainer = SoupStrainer(attrs={'id' : 'x'}) self.assertEqual(matching, self.soup.findAll(strainer)) self.assertEqual(len(self.soup.findAll(id=None)), 1) self.assertEqual(len(self.soup.findAll(width=100)), 1) self.assertEqual(len(self.soup.findAll(junk=None)), 5) self.assertEqual(len(self.soup.findAll(junk=[1, None])), 5) self.assertEqual(len(self.soup.findAll(junk=re.compile('.*'))), 0) self.assertEqual(len(self.soup.findAll(junk=True)), 0) self.assertEqual(len(self.soup.findAll(junk=True)), 0) self.assertEqual(len(self.soup.findAll(href=True)), 1) def testFindallByClass(self): soup = BeautifulSoup('FooBar') self.assertEqual(soup.find(attrs='foo').string, "Foo") self.assertEqual(soup.find('a', '1').string, "Bar") self.assertEqual(soup.find('a', '23').string, "Bar") self.assertEqual(soup.find('a', '4').string, "Bar") self.assertEqual(soup.find('a', '2'), None) def testFindAllByList(self): matching = self.soup(['a', 'ac']) self.assertEqual(len(matching), 3) def testFindAllByHash(self): matching = self.soup({'a' : True, 'b' : True}) self.assertEqual(len(matching), 4) def testFindAllText(self): soup = BeautifulSoup("\xbb") self.assertEqual(soup.findAll(text=re.compile('.*')), [u'\xbb']) def testFindAllByRE(self): import re r = re.compile('a.*') self.assertEqual(len(self.soup(r)), 3) def testFindAllByMethod(self): def matchTagWhereIDMatchesName(tag): return tag.name == tag.get('id') matching = self.soup.findAll(matchTagWhereIDMatchesName) self.assertEqual(len(matching), 2) self.assertEqual(matching[0].name, 'a') def testFindByIndex(self): """For when you have the tag and you want to know where it is.""" tag = self.soup.find('a', id="a") self.assertEqual(self.soup.index(tag), 3) # It works for NavigableStrings as well. s = tag.string self.assertEqual(tag.index(s), 0) # If the tag isn't present, a ValueError is raised. soup2 = BeautifulSoup("") tag2 = soup2.find('b') self.assertRaises(ValueError, self.soup.index, tag2) def testParents(self): soup = BeautifulSoup('