#csv2XML.py - version 0.2 - 13 July 2001
"""
*****
This script has been superceded by LL2XML.
see http://www.outwardlynormal.com/python/ll2XML.htm (that's two lower-case letter Ls, not "one one")
*****
The function csv2XML in this module accepts up to four arguments:
1) a "well formed" csv string (required)
2) a tuple of heading strings (optional)
3) a root element name string (optional)
4) a row element name string (optional)
Where no header tuple is supplied the first row of the csv string
is assumed to be a header row, and its values used to create the element names
for the XML rendition of the other rows. (Headings are normalised by the script
to lower case and have any spaces replaced by underscores, so you
shouldn't need to do anything to fix the headings yourself.)
*Using the root_element and row_element arguments:
Example: If your csv is a list of employees, you may want to use "employees" as
the root element name, and "employee" as the row level element name.
Note that "<",">" and "" should not be used to wrap these names. Just define them in plain text.
If these two arguments are not defined in the function call, the defaults - "rows" and "row" are used.
See the examples in the test function below for more detail.
The script handles escaped commas correctly: "," in the csv becomes a real comma in the XML.
Certain characters that have meaning in XML are replaced by standard XML entities in the output:
< becomes &
' becomes "'
< becomes <
> becomes >
" becomes "
To use csv2XML in another module:
import csv2XML
csv = *whatever your csv is*
xml = csv2XML.csv2XML(csv)
print xml
To test:
import csv2XML
csv2XML.test()
See the test function below for more on how to use the script.
Please send comments and suggestions to jwelby**replace_this@**nospam**outwardlynormal.com
(Remove the tag to make the e-mail address valid.)
Thanks to Dave Cole for the optimisation (and other) suggestions.
"""
import string
def getData(csv):
rowsList = string.split(csv,"\n")
i = 0
for row in rowsList:
print row
row = string.replace(row,'","', "")
row = escape(row)
rowsList[i] = string.split(row, ",")
i = i + 1
# return a list of content strings, one string for each row in the csv
return rowsList
def escape(s):
"""Replace special characters '&', "'", '<', '>' and '"' by XML entities."""
s = s.replace("&", "&") # Must be done first!
s = s.replace("'", "'")
s = s.replace("<", "<")
s = s.replace(">", ">")
s = s.replace('"', """)
return s
def cleanTag(s):
s = string.lower(s)
s = string.replace(s," ", "_")
return s
def csv2XML(csv_text,headings_tuple =(), root_element = "rows", row_element = "row"):
root_element = cleanTag(root_element)
row_element = cleanTag(row_element)
data = getData(csv_text)
if headings_tuple == ():
headings = data[0]
else:
headings = headings_tuple
#check headings
csv_column_num = len(data[0])
heading_num = len(headings)
if heading_num != csv_column_num:
missmatch =["Number of headings =", `heading_num` + "\n",
"Number of data 'columns' in csv =", `csv_column_num` + "\n",
"These numbers must be equal."]
missmatch = string.join(missmatch)
raise missmatch
for item in headings:
if item == "":
raise """There is at least one empty heading item.
Please amend csv or supply all non-empty headings in your headings tuple."""
else:
pass
# Do the conversion
xml = ""
top = """
<""" + root_element + ">"
tail = "\n" + root_element + ">"
for row in data:
if row != data[0]:
xml = xml + "\n <" + row_element + ">\n"
i = 0
for item in row:
tag = headings[i]
tag = cleanTag(tag)
item = string.replace(item,"", ",")
element = " <" + tag + ">" + item + "" + tag + ">\n"
xml = xml + element
i = i+1
xml = xml + " " + row_element + ">"
xml = top + xml + tail
return xml
# Here is a test function with some examples
def test():
csv = """Login,First Name,Last Name,Job,Group,Office,Permission
auser,Arnold,Smith,Partner,Tax,London,read
buser,Bill,Brown,Partner,Tax,New York,read
cuser,Clive,Cutler,Partner,Management,Brussels,read
duser,Denis,Davis,Developer,ISS,London,admin
euser,Eric,Ericsson,Anylist,Analysis,London,admin
fuser,Fabian,Fowles,Partner,IP,London,read"""
csv_no_heads = """auser,Arnold,Smith,Partner,Tax,London,read
buser,Bill,Brown,Partner,Tax,New York,read
cuser,Clive,Cutler,Partner,Management,Brussels,read
duser,Denis,Davis,Developer,ISS,London,admin
euser,Eric,Ericsson,Anylist,Analysis,London,admin
fuser,Fabian,Fowles,Partner,IP,London,read"""
#Example 1
print "Example 1: Simple case, using defaults.\n"
print csv2XML(csv)
print
print
#Example 2
print "Example 2: csv has its headings in the first line,\nand we define our root and row element names.\n"
print csv2XML(csv,root_element = "people", row_element = "person")
# note that csv2XML(csv,(),"people","person") would be equivalent to the function call above.
print
print
#Example 3
print "Example 3: headings supplied using the headings argument (tuple),\nusing default root and row elemnt names.\n"
csv_no_heads = """auser,Arnold,Smith,Partner,Tax,London,read
buser,Bill,Brown,Partner,Tax,New York,read
cuser,Clive,Cutler,Partner,Management,Brussels,read
duser,Denis,Davis,Developer,ISS,London,admin
euser,Eric,Ericsson,Anylist,Analysis,London,admin
fuser,Fabian,Fowles,Partner,IP,London,read"""
print csv2XML(csv_no_heads,("Login","First Name","Last Name","Job","Group","Office","Permission"))
test()