Source code for lxmlutil.etree

from lxml import etree
import copy
import re


[docs] class ElementBase(etree.ElementBase): """This class extends the etree.ElementBase class. It has many additional userfriendly methods """ @property def ns(self): """Readonly property. :returns: namespace of the element """ return etree.QName(self).namespace
[docs] def qn(self, name, nsmap=None): """Handy method to get a qualified name from given arguments :param name: prefixed or un-prefixed element name. :param nsmap: dict with key as prefix and value as namespace :returns: fully qualified name (qn) if nsmap is None, self.nsmap is used. if name has no prefix and nsmap do not have None returns name. if name has no prefix then nsmap must have None map. Example:: # when self.nsmap -> {} self.qn('some') #-> 'some' self.qn('some', {None:'http://bar'}) #-> '{http://bar}some' self.qn('c:some', {'c':'http://bar'}) #-> '{http://bar}some' self.qn('c:some') #-> KeyError: 'c' self.qn('c:some', {None:'http://bar'}) #-> KeyError: 'c' # when self.nsmap -> {None: 'http://foo/ns', 'd':'htpp://deck/ns'} self.qn('some') #-> '{http://foo/ns}some' self.qn('some', {None:'http://bar'}) #-> '{http://bar}some' self.qn('c:some', {'c':'http://bar'}) #-> '{http://bar}some' self.qn('c:some') #-> KeyError: 'c' self.qn('c:some', {None:'http://bar'}) #-> KeyError: 'c' self.qn('d:some') #-> '{htpp://deck/ns}some' """ if nsmap is None: nsmap = self.nsmap if ':' in name: pfx, name = name.split(':') else: pfx = None if (pfx is None) and (None not in nsmap): return name return '{%s}%s' % (nsmap[pfx], name)
@property def ln(self): """Handy readonly property to get a local name of this element :returns: local name Example:: # self.tag == '{http://some/ns}elemTag' -> self.ln == 'elemTag' # self.tag == 'chart' -> self.ln == chart """ return etree.QName(self).localname
[docs] def me(self, *args, **kwargs): """Alias for makeelement method See the definition of makelement method of etree.Elementbase for more details """ return self.makeelement(*args, **kwargs)
[docs] def meqn(self, tag, attrib=None, nsmap=None): """meqn is an abbreviation for makeelementqualifiedname :param tag: It could be plane tag or tag with prefix eg. 'Relationship' or 'c:chart' :param attrib: It is dictionary with string key and string val. These will be attributes of the xml element :param nsmap: If nsmap is None then nsmap of this element is used to resolve the prefix of the given tag :returns: created element object Example:: # When self.nsmap == {} self.meqn('c:chart') \ #-> Keyerror self.meqn('foo') \ #-> element obj with tag 'foo' self.meqn('a:foo', nsmap={'a':'http://Aaa/ns'}) \ #-> <{http://Aaa/ns}foo/> self.meqn('a:foo', attrib={'val':'1'}, nsmap={'a':'http://Aaa/ns'}) # -> <{http://Aaa/ns}foo "val"="1"/> # when self.nsmap == {None: 'http://foo/ns', 'd':'htpp://deck/ns'} self.meqn('some') \ #-> '{http://foo/ns}some' self.meqn('some', nsmap={None:'http://bar'}) \ #-> '{http://bar}some' self.meqn('c:some', nsmap={'c':'http://bar'}) \ #-> '{http://bar}some' self.meqn('c:some') \ #-> KeyError: 'c' self.meqn('c:some', nsmap={None:'http://bar'}) \ #-> KeyError: 'c' self.meqn('d:some') \ #-> '{htpp://deck/ns}some' """ if nsmap is None: nsmap = self.nsmap tag = self.qn(tag, nsmap) if isinstance(attrib, dict): attrib = {self.qn(i, nsmap): j for i, j in attrib.items()} return self.makeelement(tag, attrib, nsmap)
[docs] def dump(self, *args, **kwargs): """Alias method for etree.dump See etree.dump for more information. """ return etree.dump(self, *args, **kwargs)
[docs] def deepcopy(self, *args, **kwargs): """Alias method for copy.deepcopy. See copy.deepcopy for more information :returns: duplicate of this element """ return copy.deepcopy(self, *args, **kwargs)
[docs] def getqn(self, key, default=None): """Similar to self.get with additional flexibility. It can resolve the key with prefixes :param key: element's attribute name :param default: this value would be returned when element do not have \ key attribute :returns: value or default of the key attribute of this element Example:: # for element <c:somename val="0", r:id="rId3"/> e.getqn("val") = "0" e.getqn("r:id") = "rId3" """ key = self.qn(key) return self.get(key, default)
[docs] def setqn(self, key, value): """Similar to self.set with additional flexibility. It can resolve the key with prefixes :param key: element's attribute name :param value: key attribute of this element is set with the value Example:: # for element <c:somename val="0", r:id="rId3"/> e.setqn("val") = "1" e.setqn("r:id") = "rId5" # element would be now <c:somename val="1", r:id="rId5"/> """ self.set(self.qn(key), value)
[docs] def findallqn(self, path, namespaces=None): """Similar to self.findall() with additional functionality of resolving path with prefixes :param path: prefixed name paths, normal paths :param namespaces: dictionary of prefix as key and namespace as value :return: list of elements matching the given parameters Examples:: e.findallqn("c:chart//c:areaChart/c:axId") # [<Element {http://schemas.openxmlformats.org/drawingml/2006/\ chart}axId at 0x189d739f7a0>, # <Element {http://schemas.openxmlformats.org/drawingml/2006/\ chart}axId at 0x189d739f7f0>] e.findallqn("./c:chart//c:areaChart/c:axId[@val="123456"]") """ path = self._resolve_xpath(path, namespaces) return self.findall(path)
[docs] def findqn(self, path, namespaces=None): """Similar to self.find() with additional funtionality of resolving path with prefixes. Example:: # Following paths would be resolved to paths as shown below and \ passed to # self.find() method and its return value is returned 'Relationship' # -> 'Relationship' './Relationship' # -> './Relationship' './/dummy' # -> './/dummy' # when self.nsmap = {'c':'http://cee/ns', 'r':'http://ree/ns'} './/c:autoUpdate' # -> './/{http://cee/ns}autoUpdate' './c:chart//' # -> './{http://cee/ns}chart//' './/c16:uniqueId' # -> './/{http://c16/ns}uniqueId' \ when namespaces = {'c16':'http://c16/ns'} './c:chart//c:axId[@val="505253232"]' # -> './{http://cee/ns}chart//{http://cee/ns}axId[@val=\ "505253232"]' """ path = self._resolve_xpath(path, namespaces) return self.find(path)
[docs] def rm(self): """Removes the current element from its parent. Limitation: it throws AttributeError when trying to remove root element """ self.getparent().remove(self)
def __getattr__(self, name, *args, **kwargs): """This special method is called when there is no attribute with name <name> in the element object. This method looks for a element as per the <name> as below. Limitation: Cannot be used to search element with attribute value. If element names have underscores in it, this method would not result proper results Exaxmples:: # three underscores indicates './/' # two underscores indicates './' self.c_autoUpdate #-> self.find('{http://cee/ns}autoUpdate') self.___c_chart #-> self.find('.//{http://cee/ns}autoUpdate') self.__c_chart #-> self.find('./{http://cee/ns}autoUpdate') self.c_chart___c_areaChart #-> self.find('{http://cee/ns}chart//{http://cee/ns}areaChart') c:externalData r:id="rId3 self.c_externalData_r_id #-> self.find({http://cee/ns}externalData[@{http://ree/ns}id]') self.___mc_Choice #-> though self.nsmap does not have namespace # for prefix 'mc' this method would scan all the child elements # of this element and tries to find an element matching given # path in terms of <name> """ any_level = True if name.startswith('___') else False e = self for token_underscore3 in name.split('___'): if token_underscore3 == '': continue for token in token_underscore3.split('__'): if token == '': continue if e is None: return pre = ".//" if any_level else "./" tokens = token.split('_') if any(map(lambda t: t == '', tokens)): return e_backup = e if len(tokens) == 1: e = e.find(pre+token) elif len(tokens) == 2: # ns, ln pfx, ln = tokens if pfx in e.nsmap: ns = e.nsmap[pfx] e = e.find(pre+'{'+ns+'}'+ln) else: # ln, attr ln, attr = tokens e = e.find(pre+ln+'[@'+attr+']') if e is None and pfx not in e_backup.nsmap: pfx, ln = tokens lst_ns = self._get_lst_ns_from_children(e_backup, pfx) for ns in lst_ns: e = e_backup.find(pre+'{'+ns+'}'+ln) if e is not None: break elif len(tokens) == 3: # ns ln attr pfx, ln, attr = tokens if pfx in e.nsmap: ns = e.nsmap[pfx] e = e.find(pre+'{'+ns+'}'+ln+'[@'+attr+']') else: # ln ns attr ln, pfx, attr = tokens if pfx in e.nsmap: ns = e.nsmap[pfx] e = e.find(pre+ln+'[@{'+ns+'}'+attr+']') if e is None: pfx, pfx2 = tokens[:2] nsmap = e_backup.nsmap if pfx not in nsmap and pfx2 not in nsmap: # search for new ns # pfx lst_ns = self._get_lst_ns_from_children( e_backup, pfx) for ns in lst_ns: e = e.find(pre+'{'+ns+'}'+ln+'[@'+attr+']') if e is not None: break # pfx2 if e is None: lst_ns = self._get_lst_ns_from_children( e_backup, pfx2) for ns in lst_ns: e = e.find(pre+ln+'[@{'+ns+'}'+attr+']') if e is not None: break elif len(tokens) == 4: # ns, ln, ns, attr pfx, ln, pfx2, attr = tokens if pfx in e.nsmap: lst_ns = [e.nsmap[pfx]] else: lst_ns = self._get_lst_ns_from_children(e, pfx) if pfx2 in e.nsmap: lst_ns2 = [e.nsmap[pfx2]] else: lst_ns2 = self._get_lst_ns_from_children(e, pfx2) for ns in lst_ns: for ns2 in lst_ns2: e = e_backup.find(pre+'{'+ns+'}'+ln + '[@{'+ns2+'}'+attr+']') if e is not None: break if e is not None: break else: return None any_level = False any_level = True return e def _get_lst_ns_from_children(self, parent, pfx): """Supporting method to get the list of namespaces for the given prefix It searches all the elements in the parent element """ lst = [] for e in parent.iter(): if pfx in e.nsmap: lst.append(e.nsmap[pfx]) return lst def _resolve_xpath(self, path, namespaces): """ Resolves the prefixes in the given path and return a fully qualified path Example:: #Following paths would be resolved to paths as shown below and #passed to self.find() method and its return value is returned 'Relationship' #-> 'Relationship' './Relationship' #-> './Relationship' './/dummy' #-> './/dummy' # when self.nsmap = {'c':'http://cee/ns', 'r':'http://ree/ns'} './/c:autoUpdate' #-> './/{http://cee/ns}autoUpdate' './c:chart//' #-> './{http://cee/ns}chart//' './/c16:uniqueId' #-> './/{http://c16/ns}uniqueId' #when namespaces={'c16':'http://c16/ns'} './c:chart//c:axId[@val="505253232"]' #-> './{http://cee/ns}chart//{http://cee/ns}axId[@val="505253232"]' """ nsmap = self.nsmap if isinstance(namespaces, dict): nsmap.update(namespaces) nsmap_ = {k: v for k, v in nsmap.items()} if None in nsmap_.keys(): nsmap_.pop(None) lst_i = [] for i in re.split('//', path): lst_j = [] for j in re.split('/', i): tmp = j.split('[', maxsplit=1) name = tmp[0] remain = '' if len(tmp) == 2: m = re.match(r'@([a-zA-Z:0-9]*)(.*)?', tmp[1]) if m: attr = m[1] remain = m[2] try: attr = self.qn(attr, nsmap=nsmap_) except KeyError: pass remain = '[@{}{}'.format(attr, remain) else: remain = '['+tmp[1] if name.isalnum() or re.match(r'\w+:\w+', name): name = self.qn(name, nsmap=nsmap) lst_j.append(name+remain) path_j = '/'.join(lst_j) lst_i.append(path_j) path = '//'.join(lst_i) return path