Working With Fixed Record Length CSV Files
Yesterday I wrote about working with fixed record length files, and presented a class for making it easy. Today I'll extend that idea to handle records containing csv data.
The new CSVRecordFile class inherits from the RecordFile class, overriding the read and write methods to add csv parsing and formatting. This allows you to read and write fixed length csv data records in random order. The StringCSVAdaptor class (presented earlier) is used to enable us to use Python2.3's csv module with strings. This is necessary since the csv modules reader and writer functions expect to work with interables, such as a file-like object or a sequence.
Matt Goodall took me up on the need for the StringCSVAdaptor class, rightly pointing out a simpler way of handling the problem (thanks, Matt! I think you are reader #4 of this weblog, and the first person to leave me a comment!) Sadly, Matt's suggestion does not fit with the problem domain I'm using CSVRecordFile for. For one thing, I need both a csv reader and writer. I only want to create these objects once per CSVRecordFile instance, and then use them to parse/format many records in random (not sequential) order. Matt's solution, while useful for simple one-shot csv needs, looks to me to require the creation of the reader and writer for each record that is to be parsed (because he wraps the string to be parsed in a list to make it an iterable). You can read Matt's comments from yesterday's post.
Put this code in a file called csvrecfile.py: """This file contains the CSVRecordFile class, for working with fixed length record files, where the records contain csv data.""" __author__ = "Mike Kent" __version__ = "$Id$".split()[-2:][0] import recfile import csv import csvadaptor class CSVRecordFileFmtError(Exception): pass class CSVRecordFile(recfile.RecordFile): """This class provides a standard way to handle files which are layed out as fixed-length records containing csv data, where each record is padded to its proper length with a padding character, and may be optionally terminated with a record terminator string.""" def __init__(self, filename, mode, reclen, recpad='', recterm=None, **csvKwParams): recfile.RecordFile.__init__(self, filename, mode, reclen, recpad, recterm) self.csvAdaptor = csvadaptor.StringCSVAdaptor() self.csvReader = csv.reader(self.csvAdaptor, **csvKwParams) self.csvWriter = csv.writer(self.csvAdaptor, **csvKwParams) return def read(self, recNum): """Read a record containing csv data by record number, and return a list of strings. Record numbers start a 1. An empty list will returned on end of file.""" self.csvAdaptor.data = recfile.RecordFile.read(self, recNum) try: rec = self.csvReader.next() except csv.Error: raise CSVRecordFileFmtError return rec def write(self, recNum, valueList): """Write a list of mixed-type values to a record, in csv format, by record number. Record numbers start with 1. The record will be padded to the correct length using the padding character, and optionally terminated by the record terminator string. You can seek to, and write, records beyond EOF. However, to append a new record to the current actual EOF, give a record number of 0. This function returns the actual record number written to.""" try: self.csvWriter.writerow(valueList) except csv.Error: raise CSVRecordFileFmtError return recfile.RecordFile.write(self, recNum, self.csvAdaptor.data)
Here are the unit tests. Put this code in a file called test_csvrecfile.py: #! /usr/bin/env python import sys import unittest import csv import csvrecfile class TestCases_01_RecordFile(unittest.TestCase): def test_01_instantiate(self): recFileObj = csvrecfile.CSVRecordFile("test.txt", "w+b", 20, ' ', 'r\n', lineterminator='', quoting=csv.QUOTE_NONNUMERIC) self.assert_(recFileObj is not None) return class TestCases_02_RecordFileWriteAdd(unittest.TestCase): def test_01_writeAddOne(self): recFileObj = csvrecfile.CSVRecordFile("test.txt", "w+b", 20, ' ', 'r\n', lineterminator='', quoting=csv.QUOTE_NONNUMERIC) self.assert_(recFileObj is not None) rec = [ 1, 2, "three" ] recFileObj.write(0, rec) recFileObj.flush() testFileObj = file("test.txt", "rb") expected = '1,2,"three" r\n' newRec = testFileObj.read(20) self.assertEqual(newRec, expected) return def test_02_writeAddSeveral(self): recFileObj = csvrecfile.CSVRecordFile("test.txt", "w+b", 20, ' ', 'r\n', lineterminator='', quoting=csv.QUOTE_NONNUMERIC) self.assert_(recFileObj is not None) for count in range(5): rec = [ 1, 2, "Record %d" % (count + 1) ] recFileObj.write(0, rec) recFileObj.flush() testFileObj = file("test.txt", "rb") for count in range(5): expected = '1,2,"Record %d" r\n' % (count + 1) newRec = testFileObj.read(20) self.assertEqual(newRec, expected) return class TestCases_03_RecordFileWriteRandom(unittest.TestCase): def setUp(self): recFileObj = csvrecfile.CSVRecordFile("test.txt", "w+b", 20, ' ', 'r\n', lineterminator='', quoting=csv.QUOTE_NONNUMERIC) self.assert_(recFileObj is not None) for count in range(5): rec = [ 1, 2, "Record %d" % (count + 1) ] recFileObj.write(count + 1, rec) recFileObj.close() return def test_01_writeRandomOne(self): recFileObj = csvrecfile.CSVRecordFile("test.txt", "r+b", 20, ' ', 'r\n', lineterminator='', quoting=csv.QUOTE_NONNUMERIC) self.assert_(recFileObj is not None) recNum = 2 rec = [ 3, 4, "New record %d" % recNum ] recFileObj.write(recNum, rec) recFileObj.flush() testFileObj = file("test.txt", "rb") expected = '3,4,"New record %d"r\n' % recNum testFileObj.seek(20) newRec = testFileObj.read(20) self.assertEqual(newRec, expected) return def test_02_writeRandomSeveral(self): recFileObj = csvrecfile.CSVRecordFile("test.txt", "r+b", 20, ' ', 'r\n', lineterminator='', quoting=csv.QUOTE_NONNUMERIC) self.assert_(recFileObj is not None) recNumList = [ 1, 5, 3, 2, 4 ] for recNum in recNumList: rec = [ 1, 2, "New record %d" % recNum ] recFileObj.write(recNum, rec) recFileObj.flush() testFileObj = file("test.txt", "rb") for count in range(5): expected = '1,2,"New record %d"r\n' % recNumList[count] testFileObj.seek((recNumList[count] - 1) * 20) newRec = testFileObj.read(20) self.assertEqual(newRec, expected) return class TestCases_04_RecordFileReadRandom(unittest.TestCase): def setUp(self): recFileObj = csvrecfile.CSVRecordFile("test.txt", "w+b", 20, ' ', 'r\n', lineterminator='', quoting=csv.QUOTE_NONNUMERIC) self.assert_(recFileObj is not None) for count in range(5): rec = [ 1, 2, "Record %d" % (count + 1) ] recFileObj.write(count + 1, rec) recFileObj.close() return def test_01_readRandomOne(self): recFileObj = csvrecfile.CSVRecordFile("test.txt", "r+b", 20, ' ', 'r\n', lineterminator='', quoting=csv.QUOTE_NONNUMERIC) rec = recFileObj.read(3) expected = [ "1", "2", "Record 3" ] self.assertEqual(rec, expected) return def test_02_readRandomSeveral(self): recFileObj = csvrecfile.CSVRecordFile("test.txt", "r+b", 20, ' ', 'r\n', lineterminator='', quoting=csv.QUOTE_NONNUMERIC) recNumList = [ 1, 5, 3, 1, 2, 2, 4 ] for recNum in recNumList: rec = recFileObj.read(recNum) expected = [ "1", "2", "Record %d" % recNum ] self.assertEqual(rec, expected) return if __name__ == "__main__": unittest.main() sys.exit(0)
1:04:57 PM
|
|