iwannamassage.py v0.2
# Fetch and parse an HTML page into a DOM-like tree of elements. # Uses my new favorite XML parser, ElementTree by the effbot. # Rather than use ElementTree's interface to the Tidy command # (which runs Tidy in a subshell), I'm using M.-A. Lemburg's version # that turns Tidy into a Python extention. import sys, os, types, tempfile from urllib2 import urlopen, URLError from mx.Tidy import Tidy from elementtree import ElementTree, HTMLTreeBuilder from fileadaptor import FileAdaptor class TidyError(Exception): pass class TidyFilter: def __init__(self, src): print "TidyFilter __init__" self.src = src # mx.tidy.Tidy must have a true file object for input and output. # If the data source is not a true file object, use FileAdaptor to turn it into one. if not isinstance(self.src, types.FileType): self.inFileObj = FileAdaptor(self.src).file() else: self.inFileObj = self.src # Set up a temp file for output. self.outFileObj = os.tmpfile() # Use mx.Tidy.tidy to convert our input HTML into clean # output XHTML. nerrors, nwarnings, outputdata, errordata = Tidy.tidy(self.inFileObj, self.outFileObj, output_xhtml=1) # Raise an exception if Tidy got an error if nerrors: raise TidyError # Make sure the temp output file is ready for input self.outFileObj.flush() self.outFileObj.seek(0,0) return def read(self, size=None): return self.outFileObj.read(size) def main(url): # Get an ElementTree HTML parser. htmlParserObj = HTMLTreeBuilder.TreeBuilder() # Open the URL, getting a file-like URL object. try: urlObj = urlopen(url) except URLError: print "Error opening URL '%s'" % url raise # Tell ElementTree to parse our input XHTML source, using an # HTML parser. This will give us a top-level ElementTree object. treeObj = ElementTree.parse(TidyFilter(urlObj), parser=htmlParserObj) # For now, just dump out the tree so we can see it. ElementTree.dump(treeObj) return if __name__ == "__main__": main(sys.argv[1])
|
© Copyright
2003
Michael Kent.
Last update:
6/26/2003; 12:14:26 PM.
This theme is based on the SoundWaves
(blue) Manila theme. |
|