root/trunk/dabo/lib/xmltodict.py

Revision 4893, 12.0 kB (checked in by ed, 2 weeks ago)

Added code to normalize all the variations for the names of 'utf-n' encodings.

  • Property svn:eol-style set to native
Line 
1 # -*- coding: utf-8 -*-
2 """ xmltodict(): convert xml into tree of Python dicts.
3
4 This was copied and modified from John Bair's recipe at aspn.activestate.com:
5     http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/149368
6 """
7 import os
8 import string
9 import locale
10 from xml.parsers import expat
11
12 # If we're in Dabo, get the default encoding.
13 import dabo
14 import dabo.lib.DesignerUtils as desUtil
15 from dabo.dLocalize import _
16 from dabo.lib.utils import resolvePath
17 app = dabo.dAppRef
18 if app:
19     default_encoding = app.Encoding
20 else:
21     default_encoding = locale.getlocale()[1]
22     if default_encoding is None:
23         default_encoding = locale.getdefaultlocale()[1]
24         if default_encoding is None:
25             default_encoding = dabo.defaultEncoding
26 # Normalize the names, as xml.sax running on Gtk will complain for some variations
27 deLow = default_encoding.lower()
28 if deLow in ("utf8", "utf-8"):
29     default_encoding = "utf-8"
30 if deLow in ("utf16", "utf-16"):
31     default_encoding = "utf-16"
32
33 # Python seems to need to compile code with \n linesep:
34 code_linesep = "\n"
35 eol = os.linesep
36
37
38 class Xml2Obj:
39     """XML to Object"""
40     def __init__(self):
41         self.root = None
42         self.nodeStack = []
43         self.attsToSkip = []
44         self._inCode = False
45         self._mthdName = ""
46         self._mthdCode = ""
47         self._codeDict = None
48         self._inProp = False
49         self._propName = ""
50         self._propData = ""
51         self._propDict = None
52         self._currPropAtt = ""
53         self._currPropDict = None
54        
55
56     def StartElement(self, name, attributes):
57         """SAX start element even handler"""
58         if name == "code":
59             # This is code for the parent element
60             self._inCode = True
61             parent = self.nodeStack[-1]
62             if not parent.has_key("code"):
63                 parent["code"] = {}
64                 self._codeDict = parent["code"]
65
66         elif name == "properties":
67             # These are the custom property definitions
68             self._inProp = True
69             self._propName = ""
70             self._propData = ""
71             parent = self.nodeStack[-1]
72             if not parent.has_key("properties"):
73                 parent["properties"] = {}
74                 self._propDict = parent["properties"]
75
76         else:
77             if self._inCode:
78                 self._mthdName = name   #.encode()
79             elif self._inProp:
80                 if self._propName:
81                     # In the middle of a prop definition
82                     self._currPropAtt = name    #.encode()
83                 else:
84                     self._propName = name   #.encode()
85                     self._currPropDict = {}
86                     self._currPropAtt = ""
87             else:
88                 element = {"name": name}    #.encode()}
89                 if len(attributes) > 0:
90                     for att in self.attsToSkip:
91                         if attributes.has_key(att):
92                             del attributes[att]
93                     element["attributes"] = attributes
94        
95                 # Push element onto the stack and make it a child of parent
96                 if len(self.nodeStack) > 0:
97                     parent = self.nodeStack[-1]
98                     if not parent.has_key("children"):
99                         parent["children"] = []
100                     parent["children"].append(element)
101                 else:
102                     self.root = element
103                 self.nodeStack.append(element)
104
105
106     def EndElement(self, name):
107         """SAX end element event handler"""
108         if self._inCode:
109             if name == "code":
110                 self._inCode = False
111                 self._codeDict = None
112             else:
113                 # End of an individual method
114                 mth = self._mthdCode.strip()
115                 if not mth.endswith("\n"):
116                     mth += "\n"
117                 self._codeDict[self._mthdName] = mth
118                 self._mthdName = ""
119                 self._mthdCode = ""
120         elif self._inProp:
121             if name == "properties":
122                 self._inProp = False
123                 self._propDict = None
124             elif name == self._propName:
125                 # End of an individual prop definition
126                 self._propDict[self._propName] = self._currPropDict
127                 self._propName = ""
128             else:
129                 # end of a property attribute
130                 self._currPropDict[self._currPropAtt] = self._propData
131                 self._propData = self._currPropAtt = ""
132         else:
133             self.nodeStack = self.nodeStack[:-1]
134
135
136     def CharacterData(self, data):
137         """SAX character data event handler"""
138         if self._inCode or data.strip():
139             data = data.replace("&lt;", "<")
140             data = data #.encode()
141             if self._inCode:
142                 if self._mthdCode:
143                     self._mthdCode += data
144                 else:
145                     self._mthdCode = data
146             elif self._inProp:
147                 self._propData += data
148             else:
149                 element = self.nodeStack[-1]
150                 if not element.has_key("cdata"):
151                     element["cdata"] = ""
152                 element["cdata"] += data
153            
154
155     def Parse(self, xml):
156         # Create a SAX parser
157         Parser = expat.ParserCreate()
158         # SAX event handlers
159         Parser.StartElementHandler = self.StartElement
160         Parser.EndElementHandler = self.EndElement
161         Parser.CharacterDataHandler = self.CharacterData
162         # Parse the XML File
163         ParserStatus = Parser.Parse(xml, 1)
164         return self.root
165
166
167     def ParseFromFile(self, filename):
168         return self.Parse(open(filename,"r").read())
169
170
171 def xmltodict(xml, attsToSkip=[], addCodeFile=False):
172     """Given an xml string or file, return a Python dictionary."""
173     parser = Xml2Obj()
174     parser.attsToSkip = attsToSkip
175     if eol in xml and "<?xml" in xml:
176         isPath = False
177     else:
178         isPath = os.path.exists(xml)
179     errmsg = ""
180     if eol not in xml and isPath:
181         # argument was a file
182         try:
183             ret = parser.ParseFromFile(xml)
184         except expat.ExpatError, e:
185             errmsg = _("The XML in '%s' is not well-formed and cannot be parsed: %s") % (xml, e)
186     else:
187         # argument must have been raw xml:
188         try:
189             ret = parser.Parse(xml)
190         except expat.ExpatError:
191             errmsg = _("An invalid XML string was encountered")
192     if errmsg:
193         raise dabo.dException.XmlException, errmsg
194     if addCodeFile and isPath:
195         # Get the associated code file, if any
196         codePth = "%s-code.py" % os.path.splitext(xml)[0]
197         if os.path.exists(codePth):
198             try:
199                 codeDict = desUtil.parseCodeFile(open(codePth).read())
200                 ret["importStatements"] = codeDict.pop("importStatements", "")
201                 desUtil.addCodeToClassDict(ret, codeDict)
202             except StandardError, e:
203                 print "Failed to parse code file:", e
204     return ret
205
206
207 def escQuote(val, noEscape=False, noQuote=False):
208     """Add surrounding quotes to the string, and escape
209     any illegal XML characters.
210     """
211     if not isinstance(val, basestring):
212         val = str(val)
213     if not isinstance(val, unicode):
214         val = unicode(val, default_encoding)
215     if noQuote:
216         qt = ''
217     else:
218         qt = '"'
219     slsh = "\\"
220 #   val = val.replace(slsh, slsh+slsh)
221     if not noEscape:
222         # First escape internal ampersands. We need to double them up due to a
223         # quirk in wxPython and the way it displays this character.
224         val = val.replace("&", "&amp;&amp;")
225         # Escape any internal quotes
226         val = val.replace('"', '&quot;').replace("'", "&apos;")
227         # Escape any high-order characters
228         chars = []
229         for pos, char in enumerate(list(val)):
230             if ord(char) > 127:
231                 chars.append("&#%s;" % ord(char))
232             else:
233                     chars.append(char)
234         val = "".join(chars)
235     val = val.replace("<", "&#060;").replace(">", "&#062;")
236     return "%s%s%s" % (qt, val, qt)
237
238
239 def dicttoxml(dct, level=0, header=None, linesep=None):
240     """Given a Python dictionary, return an xml string.
241
242     The dictionary must be in the format returned by dicttoxml(), with keys
243     on "attributes", "code", "cdata", "name", and "children".
244
245     Send your own XML header, otherwise a default one will be used.
246
247     The linesep argument is a dictionary, with keys on levels, allowing the
248     developer to add extra whitespace depending on the level.
249     """
250     att = ""
251     ret = ""
252
253     if dct.has_key("attributes"):
254         for key, val in dct["attributes"].items():
255             # Some keys are already handled.
256             noEscape = key in ("sizerInfo",)
257             val = escQuote(val, noEscape)
258             att += " %s=%s" % (key, val)
259     ret += "%s<%s%s" % ("\t" * level, dct["name"], att)
260
261     if (not dct.has_key("cdata") and not dct.has_key("children")
262             and not dct.has_key("code") and not dct.has_key("properties")):
263         ret += " />%s" % eol
264     else:
265         ret += ">"
266         if dct.has_key("cdata"):
267             ret += "%s" % dct["cdata"].replace("<", "&lt;")
268
269         if dct.has_key("code"):
270             if len(dct["code"].keys()):
271                 ret += "%s%s<code>%s" % (eol, "\t" * (level+1), eol)
272                 methodTab = "\t" * (level+2)
273                 for mthd, cd in dct["code"].items():
274                     # Convert \n's in the code to eol:
275                     cd = eol.join(cd.splitlines())
276
277                     # Make sure that the code ends with a linefeed
278                     if not cd.endswith(eol):
279                         cd += eol
280
281                     ret += "%s<%s><![CDATA[%s%s]]>%s%s</%s>%s" % (methodTab,
282                             mthd, eol, cd, eol,
283                             methodTab, mthd, eol)
284                 ret += "%s</code>%s"    % ("\t" * (level+1), eol)
285
286         if dct.has_key("properties"):
287             if len(dct["properties"].keys()):
288                 ret += "%s%s<properties>%s" % (eol, "\t" * (level+1), eol)
289                 currTab = "\t" * (level+2)
290                 for prop, val in dct["properties"].items():
291                     ret += "%s<%s>%s" % (currTab, prop, eol)
292                     for propItm, itmVal in val.items():
293                         itmTab = "\t" * (level+3)
294                         ret += "%s<%s>%s</%s>%s" % (itmTab, propItm, itmVal,
295                                 propItm, eol)
296                     ret += "%s</%s>%s" % (currTab, prop, eol)
297                 ret += "%s</properties>%s"  % ("\t" * (level+1), eol)
298                    
299         if dct.has_key("children") and len(dct["children"]) > 0:
300             ret += eol
301             for child in dct["children"]:
302                 ret += dicttoxml(child, level+1, linesep=linesep)
303         indnt = ""
304         if ret.endswith(eol):
305             # Indent the closing tag
306             indnt = ("\t" * level)
307         ret += "%s</%s>%s" % (indnt, dct["name"], eol)
308
309         if linesep:
310             ret += linesep.get(level, "")
311
312     if level == 0:
313         if header is None:
314             header = '<?xml version="1.0" encoding="%s" standalone="no"?>%s' \
315                     % (default_encoding, eol)
316         ret = header + ret
317
318     return ret
319
320
321 def flattenClassDict(cd, retDict=None):
322     """Given a dict containing a series of nested objects such as would
323     be created by restoring from a cdxml file, returns a dict with all classIDs
324     as keys, and a dict as the corresponding value. The dict value will have
325     keys for the attributes and/or code, depending on what was in the original
326     dict. The end result is to take a nested dict structure and return a flattened
327     dict with all objects at the top level.
328     """
329     if retDict is None:
330         retDict = {}
331     atts = cd.get("attributes", {})
332     props = cd.get("properties", {})
333     kids = cd.get("children", [])
334     code = cd.get("code", {})
335     classID = atts.get("classID", "")
336     classFile = resolvePath(atts.get("designerClass", ""))
337     superclass = resolvePath(atts.get("superclass", ""))
338     superclassID = atts.get("superclassID", "")
339     if superclassID and os.path.exists(superclass):
340         # Get the superclass info
341         superCD = xmltodict(superclass, addCodeFile=True)
342         flattenClassDict(superCD, retDict)
343     if classID:
344         if os.path.exists(classFile):
345             # Get the class info
346             classCD = xmltodict(classFile, addCodeFile=True)
347             classAtts = classCD.get("attributes", {})
348             classProps = classCD.get("properties", {})
349             classCode = classCD.get("code", {})
350             classKids = classCD.get("children", [])
351             currDict = retDict.get(classID, {})
352             retDict[classID] = {"attributes": classAtts, "code": classCode,
353                     "properties": classProps}
354             retDict[classID].update(currDict)
355             # Now update the child objects in the dict
356             for kid in classKids:
357                 flattenClassDict(kid, retDict)
358         else:
359             # Not a file; most likely just a component in another class
360             currDict = retDict.get(classID, {})
361             retDict[classID] = {"attributes": atts, "code": code,
362                     "properties": props}
363             retDict[classID].update(currDict)
364     if kids:
365         for kid in kids:
366             flattenClassDict(kid, retDict)
367     return retDict
368
369
370 def addInheritedInfo(src, super, updateCode=False):
371     """Called recursively on the class container structure, modifying
372     the attributes to incorporate superclass information. When the
373     'updateCode' parameter is True, superclass code is added to the
374     object's code
375     """
376     atts = src.get("attributes", {})
377     props = src.get("properties", {})
378     kids = src.get("children", [])
379     code = src.get("code", {})
380     classID = atts.get("classID", "")
381     if classID:
382         superInfo = super.get(classID, {"attributes": {}, "code": {}, "properties": {}})
383         src["attributes"] = superInfo["attributes"].copy()
384         src["attributes"].update(atts)
385         src["properties"] = superInfo.get("properties", {}).copy()
386         src["properties"].update(props)
387         if updateCode:
388             src["code"] = superInfo["code"].copy()
389             src["code"].update(code)
390     if kids:
391         for kid in kids:
392             addInheritedInfo(kid, super, updateCode)
393
394
395
396 if __name__ == "__main__":
397     test_dict = {"name": "test", "attributes":{"path": "c:\\temp\\name",
398             "problemChars": "Welcome to <Jos\xc3\xa9's \ Stuff!>\xc2\xae".decode("latin-1")}}
399     print "test_dict:", test_dict
400     xml = dicttoxml(test_dict)
401     print "xml:", xml
402     test_dict2 = xmltodict(xml)
403     print "test_dict2:", test_dict2
404     print "same?:", test_dict == test_dict2
Note: See TracBrowser for help on using the browser.