#csv2XML.py - version 0.2 - 13 July 2001 """ ***** This script has been superceded by LL2XML. see http://www.outwardlynormal.com/python/ll2XML.htm (that's two lower-case letter Ls, not "one one") ***** The function csv2XML in this module accepts up to four arguments: 1) a "well formed" csv string (required) 2) a tuple of heading strings (optional) 3) a root element name string (optional) 4) a row element name string (optional) Where no header tuple is supplied the first row of the csv string is assumed to be a header row, and its values used to create the element names for the XML rendition of the other rows. (Headings are normalised by the script to lower case and have any spaces replaced by underscores, so you shouldn't need to do anything to fix the headings yourself.) *Using the root_element and row_element arguments: Example: If your csv is a list of employees, you may want to use "employees" as the root element name, and "employee" as the row level element name. Note that "<",">" and " becomes > " becomes " To use csv2XML in another module: import csv2XML csv = *whatever your csv is* xml = csv2XML.csv2XML(csv) print xml To test: import csv2XML csv2XML.test() See the test function below for more on how to use the script. Please send comments and suggestions to jwelby**replace_this@**nospam**outwardlynormal.com (Remove the tag to make the e-mail address valid.) Thanks to Dave Cole for the optimisation (and other) suggestions. """ import string def getData(csv): rowsList = string.split(csv,"\n") i = 0 for row in rowsList: print row row = string.replace(row,'","', "") row = escape(row) rowsList[i] = string.split(row, ",") i = i + 1 # return a list of content strings, one string for each row in the csv return rowsList def escape(s): """Replace special characters '&', "'", '<', '>' and '"' by XML entities.""" s = s.replace("&", "&") # Must be done first! s = s.replace("'", "'") s = s.replace("<", "<") s = s.replace(">", ">") s = s.replace('"', """) return s def cleanTag(s): s = string.lower(s) s = string.replace(s," ", "_") return s def csv2XML(csv_text,headings_tuple =(), root_element = "rows", row_element = "row"): root_element = cleanTag(root_element) row_element = cleanTag(row_element) data = getData(csv_text) if headings_tuple == (): headings = data[0] else: headings = headings_tuple #check headings csv_column_num = len(data[0]) heading_num = len(headings) if heading_num != csv_column_num: missmatch =["Number of headings =", `heading_num` + "\n", "Number of data 'columns' in csv =", `csv_column_num` + "\n", "These numbers must be equal."] missmatch = string.join(missmatch) raise missmatch for item in headings: if item == "": raise """There is at least one empty heading item. Please amend csv or supply all non-empty headings in your headings tuple.""" else: pass # Do the conversion xml = "" top = """ <""" + root_element + ">" tail = "\n" for row in data: if row != data[0]: xml = xml + "\n <" + row_element + ">\n" i = 0 for item in row: tag = headings[i] tag = cleanTag(tag) item = string.replace(item,"", ",") element = " <" + tag + ">" + item + "\n" xml = xml + element i = i+1 xml = xml + " " xml = top + xml + tail return xml # Here is a test function with some examples def test(): csv = """Login,First Name,Last Name,Job,Group,Office,Permission auser,Arnold,Smith,Partner,Tax,London,read buser,Bill,Brown,Partner,Tax,New York,read cuser,Clive,Cutler,Partner,Management,Brussels,read duser,Denis,Davis,Developer,ISS,London,admin euser,Eric,Ericsson,Anylist,Analysis,London,admin fuser,Fabian,Fowles,Partner,IP,London,read""" csv_no_heads = """auser,Arnold,Smith,Partner,Tax,London,read buser,Bill,Brown,Partner,Tax,New York,read cuser,Clive,Cutler,Partner,Management,Brussels,read duser,Denis,Davis,Developer,ISS,London,admin euser,Eric,Ericsson,Anylist,Analysis,London,admin fuser,Fabian,Fowles,Partner,IP,London,read""" #Example 1 print "Example 1: Simple case, using defaults.\n" print csv2XML(csv) print print #Example 2 print "Example 2: csv has its headings in the first line,\nand we define our root and row element names.\n" print csv2XML(csv,root_element = "people", row_element = "person") # note that csv2XML(csv,(),"people","person") would be equivalent to the function call above. print print #Example 3 print "Example 3: headings supplied using the headings argument (tuple),\nusing default root and row elemnt names.\n" csv_no_heads = """auser,Arnold,Smith,Partner,Tax,London,read buser,Bill,Brown,Partner,Tax,New York,read cuser,Clive,Cutler,Partner,Management,Brussels,read duser,Denis,Davis,Developer,ISS,London,admin euser,Eric,Ericsson,Anylist,Analysis,London,admin fuser,Fabian,Fowles,Partner,IP,London,read""" print csv2XML(csv_no_heads,("Login","First Name","Last Name","Job","Group","Office","Permission")) test()