import RSSHelper
import BeautifulSoup
import AsciiDammit
from PyRSS2Gen import RSSItem, Guid

class DoverFeed(RSSHelper.WebPageBasedRSSFeed):    

    def HTML2RSS(self, headers, body):
        soup = BeautifulSoup.BeautifulSoup(body)
        for table in soup('table', {'height' : '30%'}):
            tr = table.tr
            img = tr.img['src']
            a = tr.a
            title = str(a.string)
            link = self.baseURL + a['href']
            td = tr('td')[1]
            author = td.br.next
            if len(author) > 3 and author[:3] == 'by ':
                author = author[3:]                
            description = td('font')[-2]
            description.a['href'] = link
            while getattr(description.contents[-1], 'name') == 'br':
                del(description.contents[-1])
            description = description.renderContents()
            guid = Guid(link)
            if not self.hasSeen(guid):
                rss = RSSItem(title=AsciiDammit.htmlDammit(title), link=link,
                              author=AsciiDammit.htmlDammit(author),
                              guid=guid,
                              description=AsciiDammit.htmlDammit(description),
                              pubDate = self.lastBuildDate)
                self.addRSSItem(rss)

feed = RSSHelper.loadFeed(DoverFeed,
                          'http://store.doverpublications.com/new-releases.html',
                          "Newly published Dover books",
                          "Keep track of the latest books published by Dover",
                          'dover.pickle',
                          '/home/leonardr/public_html/automat/dover/dover.rss')
feed.refresh()

