1
2 """
3 Convert a XML page to a dictionary
4 """
5
6 import xml.parsers.expat
7 import urllib
8
10 """
11 Convert a XML page to a dictionary
12 """
14 self.text = text
15 self.content = {}
16 self.elem = []
17 self.logger = logger
18 p = xml.parsers.expat.ParserCreate()
19 p.StartElementHandler = self.start_element
20 p.EndElementHandler = self.end_element
21 p.CharacterDataHandler = self.char_data
22 if logger:
23 logger.debug("XMLParser initialised")
24 p.Parse(text)
25
27 """
28 If a new element is found, at it to the dictionary
29 """
30 if len(self.elem)==0:
31 self.content[name] = {}
32 self.elem.append(name)
33 else:
34 curdict = self.content
35 for ie in self.elem:
36 curdict = curdict[ie]
37 if not name in curdict.keys():
38 curdict[name] = {}
39 if not self.elem[-1]==name:
40 self.elem.append(name)
41 if attrs:
42 key = attrs[attrs.keys()[0]]
43
44
45 if key not in curdict[name].keys():
46 curdict[name][key] = {}
47 self.elem.append(key)
48
50 """
51 Remove the element from the queu (and everything after it)
52
53 @parameter name: designation of the element
54 @type name: string
55 """
56
57 if len(self.elem)>1:
58 index = len(self.elem) - 1 - self.elem[::-1].index(name)
59 self.elem = self.elem[:index]
60
62 """
63 Add the value of an element to the dictionary with its designation.
64
65 @parameter data: value of the element
66 @type data: string
67 """
68
69 curdict = self.content
70 for ie in self.elem[:-1]:
71 curdict = curdict[ie]
72
73 try:
74 data = float(data)
75 except:
76 pass
77
78
79 if curdict[self.elem[-1]] and not isinstance(curdict[self.elem[-1]],list):
80 curdict[self.elem[-1]] = [curdict[self.elem[-1]]]
81 curdict[self.elem[-1]].append(data)
82 elif curdict[self.elem[-1]]:
83 curdict[self.elem[-1]].append(data)
84 else:
85 curdict[self.elem[-1]] = data
86 if self.logger:
87 self.logger.debug("... %s: %s"%(self.elem,data))
88