Trivial Thoughts
Thoughts and discussion on programming projects using the Python language.



Subscribe to "Trivial Thoughts" in Radio UserLand.

Click to see the XML version of this web page.

Click here to send an email to the editor of this weblog.
 

 

iwannamassage.py v0.5

# Fetch and parse an HTML page into a DOM-like tree of elements.
# Uses my new favorite XML parser, ElementTree by the effbot.
# Rather than use ElementTree's interface to the Tidy command
# (which runs Tidy in a subshell), I'm using M.-A. Lemburg's version
# that turns Tidy into a Python extention.
from __future__ import generators
import sys, os, types, tempfile
from urllib2 import urlopen, URLError
from mx.Tidy import Tidy
from mx import DateTime
# I'm using ElementTree 1.2a, which supports limited XPath
from elementtree import ElementTree, HTMLTreeBuilder
from fileadaptor import FileAdaptor
from toolbox import enumerate
# This is a necessary hack, as the web page displays no year in its dates.
currYear = "2003"
class TidyError(Exception): pass
class TidyFilter:
    def __init__(self, src):
        self.src = src
  
        # mx.tidy.Tidy must have a true file object for input and output.
        # If the data source is not a true file object, use FileAdaptor to
        # turn it into one.
        if not isinstance(self.src, types.FileType):
            self.inFileObj = FileAdaptor(self.src).file()
        else:
            self.inFileObj = self.src
        # Set up a temp file for output.
        self.outFileObj = os.tmpfile()
        # Use mx.Tidy.tidy to convert our input HTML into clean
        # output XHTML.
        nerrors, nwarnings, outputdata, errordata =
            Tidy.tidy(self.inFileObj, self.outFileObj, output_xhtml=1)
        # Raise an exception if Tidy got an error
        if nerrors:
            raise TidyError
        # Make sure the temp output file is ready for input        
        self.outFileObj.flush()
        self.outFileObj.seek(0,0)
        return
    def read(self, size=None):
        return self.outFileObj.read(size)   

class ApptSched:
    """Initialize an instance of this class with the object returned by
    ElementTree. It will extract the schedule into a searchable form.  It
    provides methods for getting the next available appointment slot."""
    def __init__(self, treeObj):
        self.skipCols = []
        self.dates = []
       
        # First, do the very data-specific stuff needed to find the
        # appointment schedule table element.  This was determined empirically.
        # ElementTree 1.2a supports a limited form of XPath, which is a way of
        # addressing a particular element in an XML file.  Here, we are getting
        # the element that contains the body of the table that contains the
        # appointment schedule, which just happens to be the last 'table'
        # element on the web page.
        elem = treeObj.findall(".//table")[-1]
        # Get a list of all the rows in the table.
        rows = elem.getchildren()
        # Row 0 contains date labels, and some blank columns that will
        # need to be skipped.  Extract that info.
        self.getDatesFromRow(rows[0])
        # Column 0 contains the time labels.
        self.times = [ row[0].text for row in rows[1:] ]
        # Now pull out the appointment slots availability info.
        # Each slot element has a 'class' attribute, that can be one of
        # "taken", "avail" or "na" (meaning 'Not Available').
        # Here, we generate a two-dimensional array (actually, a list of
        # lists) to represent the appointment schedule slots.  Each element in
        # the array contains the value of the 'class' attribute.
        self.slots = [ [ col.get("class") for colIndex, col in
                enumerate(row.getchildren())
                if colIndex not in self.skipCols ] for row in rows[1:] ]
        return
    def getDatesFromRow(self, rowElem):
        """Extract two lists from the row containing both blank column labels
        and date column labels.  One list contains the indexes of all the blank
        column labels, while the other list contains the date labels."""
        for index, colElem in enumerate(rowElem):
            if len(colElem) > 0:
                self.dates.append(colElem[0].text)
            else:
                self.skipCols.append(index)
        return
    def slotToDateTime(self, timeIndex, dateIndex):
        """Given the indexes into a two-dimension array that identifies a
        slot, this method converts the date and time for that slot into a form
        that can be used to instantiate an mx.DateTime object, and returns
        that object."""
        timeRaw = self.times[timeIndex]
        dateRaw = self.dates[dateIndex]
        fullDateTimeString = " ".join([dateRaw.split(",")[1].strip(), currYear,
                                       timeRaw])
        dateTimeObj = DateTime.Parser.DateTimeFromString(fullDateTimeString)
        return dateTimeObj
   
    def getNextAvail(self):
        """This method is a generator which will return the next available
        appointment slot each time it is called, as an mx.DateTime object."""
        for timeIndex, timeSlots in enumerate(self.slots):
            for dateIndex, dateSlot in enumerate(timeSlots):
                if dateSlot.startswith("avail"):
                    yield self.slotToDateTime(timeIndex, dateIndex)
        return

class MassageAppt:
    """This class will extract the appointment schedule table from the web
    page, and provides methods for getting the next available appointment
    slot, and making an appointment for that slot."""
    def __init__(self, url):
        # Initialize an empty list of tuples of DateTime pairs used to exclude
        # date/time ranges when finding available appointment slots.
        self.excludes = []
        self.getConflicts()
       
        # Get an ElementTree HTML parser. 
        htmlParserObj = HTMLTreeBuilder.TreeBuilder()
        # Open the URL, getting a file-like URL object.
        try:
            urlObj = urlopen(url)
        except (URLError, OSError):
            print "Error opening URL '%s'" % url
            raise
       
        # Tell ElementTree to parse our input XHTML source, using an
        # HTML parser.  This will give us a top-level ElementTree object.
        treeObj = ElementTree.parse(TidyFilter(urlObj), parser=htmlParserObj)
        # Use an instance of the ApptSched class to extract the appointment
        # schedule data from the tree of elements.
        self.apptSched = ApptSched(treeObj)
        return
    def findSlot(self):
        """This method will find the next available appointment slot which
        does not fall within excluded times.  These excluded times are
        specified by a list of tuples of start and end times, given as
        mx.DateTime objects."""
        foundSlot = None
        # For each available appointment slot...
        for slotDT in self.apptSched.getNextAvail():
            # For each date/time range to exclude...
            for startDT, endDT in self.excludes:
                # If the available slot falls withing an excluded date/time
                # range, skip it.
                if DateTime.cmp(startDT, slotDT) <= 0 and
                   DateTime.cmp(slotDT, endDT) <= 0:
                    continue
                # Else this is the first available slot not excluded.
                else:
                    foundSlot = slotDT
                    break
        return foundSlot
    def addExcludeRange(self, startDT, endDT):
        """Add a tuple of DateTime objects that specify a date/time range to
        exclude when finding an available appointment slot."""
        self.excludes.append((startDT, endDT))
        return
    def getConflicts(self):
        try:
            conflictsFileObj = file("/conflicts.txt", 'r')
        except IOError:
            print "Conflicts file not found"
            return
        for line in conflictsFileObj:        
            cleanLine = line.replace('n', '').strip()
            splitList = cleanLine.split(',')
            try:
                date = splitList[0]
                startTime = splitList[1]
                endTime = splitList[2]
            except IndexError:
                raise
            startDateTime = " ".join([date, startTime])
            startDT = DateTime.Parser.DateTimeFromString(startDateTime)
            endDateTime = " ".join([date, endTime])
            endDT = DateTime.Parser.DateTimeFromString(endDateTime)
            self.addExcludeRange(startDT, endDT)
        return
   
if __name__ == "__main__":
    app = MassageAppt(sys.argv[1])
    print app.findSlot()
    sys.exit(0)


Click here to visit the Radio UserLand website. © Copyright 2003 Michael Kent.
Last update: 7/17/2003; 9:55:44 PM.
This theme is based on the SoundWaves (blue) Manila theme.