--- src/py/xml-rewrite-2.py +++ src/py/xml-rewrite-2.py @@ -8,10 +8,14 @@ # Authors: # Saleem Abdulrasool # Petteri Räty +# Karsten Merkle # Maintainer: Gentoo Java Herd # Python based XML modifier # ChangeLog +# Karsten Merkle +# Februar 13, 2012 - elements can now be deleted. +# Februar 19, 2012 - paths for matches can now be defined # Petteri Räty (?P//)?/?(?P[^/\[]+)(?P\[@(?P[^=\]]+)(=\"?(?P[^\"]*)\"?)?\])?)') + self.matches = options.matches + self.qualifier = options.qualifier def change_elem(self, elem): - for i,attr in enumerate(self.attributes): - if self.values: - elem.setAttribute(attr, self.values[i]) - else: - try: - elem.removeAttribute(attr) - except DomRewriter.NotFoundErr: + if self.attributes: + for i,attr in enumerate(self.attributes): + if self.doAdd: + elem.setAttribute(attr, self.values[i]) + else: + try: + elem.removeAttribute(attr) + except DomRewriter.NotFoundErr: + continue + else: + elem.parentNode.removeChild(elem) + + def match_elem(self, position, index, parent): + if len(self.matches[index]) == position: + return True + match = self.pathRe.match(self.matches[index], position) + if not match: + return False + new_position = match.end() + target = match.groupdict() + t_elemet = target["elem"] + t_attribute = target["attrName"] + t_value = target["attrValue"] + t_root = target["root"] + if t_root and not position==0: + error("missformed expression at "+ target["ex"] + " in self.matches[index]") + if not t_elemet: + error("Missing element in " + options.matches[index]) + for elem in parent.getElementsByTagName(t_elemet): + if not position == 0 or t_root: + if not elem.parentNode == parent: + continue + if t_attribute: + if not elem.hasAttribute(t_attribute): continue + if t_value: + attr = elem.getAttributeNode(t_attribute) + if not attr.value == t_value: + continue + if self.match_elem(new_position, index, elem) and self.qualify_elem(0, index, elem): + if self.doAdd: + if self.modify: + elemNew = self.document.createElement(self.modify[index]) + elem.appendChild(elemNew) + elem = elemNew + if self.attributes: + elem.setAttribute(self.attributes[index], self.values[index]) + else: + if (t_attribute and self.values): + elem.setAttribute(t_attribute, self.values[index]) + else: + if t_attribute: + elem.removeAttribute(t_attribute) + else: + elem.parentNode.removeChild(elem) + return False; + + def qualify_elem(self, position, index, parent): + if not self.qualifier: + return True + if len(self.qualifier[index]) == position: + return True + match = self.pathRe.match(self.qualifier[index], position) + if not match: + return False + new_position = match.end() + target = match.groupdict() + t_elemet = target["elem"] + t_attribute = target["attrName"] + t_value = target["attrValue"] + t_root = target["root"] + if t_root: + error("qualifier must not start with root //. Qualifier uses always match path as root.") + if not t_elemet: + error("Missing element in " + options.matches[index]) + for elem in parent.getElementsByTagName(t_elemet): + if not elem.parentNode == parent: + continue + if t_attribute: + if not elem.hasAttribute(t_attribute): + return False + if t_value: + attr = elem.getAttributeNode(t_attribute) + if not attr.value == t_value: + return False + if self.qualify_elem(new_position, index, elem): + return True def process(self,in_stream,callback=None): from xml.dom.minidom import parse @@ -79,37 +167,32 @@ if callback: callback(self.document) - if not self.modify: - return - - for tag in self.modify: - matches = self.document.getElementsByTagName(tag) - if matches: - if self.index == None: - for match in matches: - self.change_elem(match) - else: - self.change_elem(matches[self.index]) + if self.matches: + for i in range(len(self.matches)): + self.match_elem(0, i, self.document) + else: + if not self.modify: + return + for tag in self.modify: + matches = self.document.getElementsByTagName(tag) + if matches: + if self.index == None: + for match in matches: + self.change_elem(match) + else: + self.change_elem(matches[self.index]) def write(self,stream): stream.write(self.document.toxml()) class StreamRewriterBase: - def __init__(self, elems, attributes, values, index, - sourceElems = [], sourceAttributes = [], sourceValues = [], - targetElems = [], targetAttributes = [], targetValues = [] ): + def __init__(self, elems, attributes, values, index): self.buffer = StringIO.StringIO() self.__write = self.buffer.write self.elems = elems or [] self.attributes = attributes or [] self.values = values or [] - self.sourceElems = sourceElems or [] - self.sourceAttributes = sourceAttributes or [] - self.sourceValues = sourceValues or [] - self.targetElems = targetElems or [] - self.targetAttributes = targetAttributes or [] - self.targetValues = targetValues or [] def p(self,str): self.__write(str.encode('utf8')) @@ -126,54 +209,40 @@ self.p(u'<%s ' % name) match = ( name in self.elems ) - matchSource = ( name in self.sourceElems ) - matchTarget = ( name in self.targetElems ) for a,v in attrs: - if not ( - (match and a in self.attributes) - or (matchSource and a in self.sourceAttributes) - or (matchTarget and a in self.targetAttributes) - ): + if not (match and a in self.attributes): self.write_attr(a,v) - if matchSource: - for i, attr in enumerate(self.sourceAttributes): - self.write_attr(attr, self.sourceValues[i]) - - if matchTarget: - for i, attr in enumerate(self.targetAttributes): - self.write_attr(attr, self.targetValues[i]) - if match: for i, attr in enumerate(self.attributes): self.write_attr(attr, self.values[i]) self.p(u'>') -class ExpatRewriter(StreamRewriterBase): - """ - The only problem with this Expat based implementation is that it does not - handle entities doctypes etc properly so for example dev-java/skinlf fails. - """ - def process(self, in_stream): - from xml.parsers.expat import ParserCreate - parser = ParserCreate() - - parser.StartElementHandler = self.start_element - parser.EndElementHandler = self.end_element - parser.CharacterDataHandler = self.char_data - parser.ParseFile(in_stream) - self.p(u'\n') - - def start_element(self, name, attrs): - StreamRewriterBase(self, name, attrs.iteritems()) - - def end_element(self,name): - self.p(u'' % name) - - def char_data(self,data): - self.p(escape(data)) +#class ExpatRewriter(StreamRewriterBase): +# """ +# The only problem with this Expat based implementation is that it does not +# handle entities doctypes etc properly so for example dev-java/skinlf fails. +# """ +# def process(self, in_stream): +# from xml.parsers.expat import ParserCreate +# parser = ParserCreate() +# +# parser.StartElementHandler = self.start_element +# parser.EndElementHandler = self.end_element +# parser.CharacterDataHandler = self.char_data +# parser.ParseFile(in_stream) +# self.p(u'\n') +# +# def start_element(self, name, attrs): +# StreamRewriterBase(self, name, attrs.iteritems()) +# +# def end_element(self,name): +# self.p(u'' % name) +# +# def char_data(self,data): +# self.p(escape(data)) from xml.sax.saxutils import XMLGenerator class SaxRewriter(XMLGenerator, StreamRewriterBase): @@ -181,12 +250,8 @@ Using Sax gives us the support for writing back doctypes and all easily and is only marginally slower than expat as it is just a tight layer over it """ - def __init__(self, elems, attributes, values, index, - sourceElems = [], sourceAttributes = [], sourceValues = [], - targetElems = [], targetAttributes = [], targetValues = []): - StreamRewriterBase.__init__(self, elems, attributes, values, index, - sourceElems, sourceAttributes, sourceValues, - targetElems, targetAttributes, targetValues) + def __init__(self, elems, attributes, values, index): + StreamRewriterBase.__init__(self, elems, attributes, values, index) XMLGenerator.__init__(self, self.buffer, 'UTF-8') def process(self, in_stream): @@ -198,29 +263,33 @@ self.start_element(name, attrs.items()) if __name__ == '__main__': + import re + usage = "XML Rewrite Python Module Version " + __version__ + "\n" usage += "Copyright 2004,2006,2007 Gentoo Foundation\n" - usage += "Distributed under the terms of the GNU General Public Lincense v2\n" + usage += "Distributed under the terms of the GNU General Public License v2\n" usage += "Please contact the Gentoo Java Team with problems.\n" usage += "\n" usage += "Usage:\n" - usage += " xml-rewrite.py [-f file] --delete [-g] -e tag [-e tag] -a attribute [-a attribute] [-i index]\n" - usage += " xml-rewrite.py [-f file] --change [-g] -e tag [-e tag] -a attribute -v value [-a attribute -v value] \\\n" - usage += " [--source-element tag] [--source-attribute attribute --source-value value] \\\n" - usage += " [--target-element tag] [--target-attribute attribute --target-value value] [-i index]\n" + usage += " xml-rewrite.py [-f file]... --delete [-g] -e tag [-e tag]... [-a attribute]... [-i index] [-m [-q ]]...\n" + usage += " xml-rewrite.py [-f file]... --change [-g] -e tag [-e tag]... -a attribute -v value [-a attribute -v value]... [-m [-q ]]... \\\n" usage += "Or:\n" usage += " xml-rewrite.py [-f file] -g\n" usage += "\n" usage += "If the -f parameter is not utilized, the script will read and\n" usage += "write to stdin and stdout respectively. The use of quotes on\n" usage += "parameters will break the script.\n" + usage += " -'xy[@tz=\"main\"/qr` will match all 'qr' elements of all 'xy' elements which 'tz' attributes value is main\n" + usage += "Matching:\n" + usage += "-c with -v only : only existing attribute defined by match-path will be changed\n" + usage += "-c with -v and -a : the given attribute is added/changed to the element defined by match-path\n" + usage += "-c with -e only : the given element is added to the element defined by match-path\n" def error(message): print "ERROR: " + message sys.exit(1) - # if len(sys.argv) == 1: # usage(True) @@ -232,18 +301,29 @@ make_option ("-e", "--element", action="append", dest="elements", help="Tag of the element of which the attributes to be changed. These can be chained for multiple elements."), make_option ("-a", "--attribute", action="append", dest="attributes", help="Attribute of the matching elements to change. These can be chained for multiple value-attribute pairs"), make_option ("-v", "--value", action="append", dest="values", help="Value to set the attribute to."), - make_option ("-r", "--source-element", action="append", dest="source_elements", help="Tag of the element of which the attributes to be changed just in source scope. These can be chained for multiple elements."), - make_option ("-t","--source-attribute", action="append", dest="source_attributes", help="Attribute of the matching elements to change. These can be chained for multiple value-attribute pairs (for source only)"), - make_option ("-y", "--source-value", action="append", dest="source_values", help="Value to set the attribute to. (sourceonly)"), - make_option ("-j", "--target-element", action="append", dest="target_elements", help="Tag of the element of which the attributes to be changed just in target scope. These can be chained for multiple elements."), - make_option ("-k", "--target-attribute", action="append", dest="target_attributes", help="Attribute of the matching elements to change. These can be chained for multiple value-attribute pairs (for targetonly)"), - make_option ("-l", "--target-value", action="append", dest="target_values", help="Value to set the attribute to (targeronly)."), - make_option ("-i", "--index", type="int", dest="index", help="Index of the match. If none is specified, the changes will be applied to all matches within the document. Starts from zero.") +# make_option ("-r", "--source-element", action="append", dest="source_elements", help="Tag of the element of which the attributes to be changed just in source scope. These can be chained for multiple elements."), +# make_option ("-t", "--source-attribute", action="append", dest="source_attributes", help="Attribute of the matching elements to change. These can be chained for multiple value-attribute pairs (for source only)"), +# make_option ("-y", "--source-value", action="append", dest="source_values", help="Value to set the attribute to. (sourceonly)"), +# make_option ("-j", "--target-element", action="append", dest="target_elements", help="Tag of the element of which the attributes to be changed just in target scope. These can be chained for multiple elements."), +# make_option ("-k", "--target-attribute", action="append", dest="target_attributes", help="Attribute of the matching elements to change. These can be chained for multiple value-attribute pairs (for targetonly)"), +# make_option ("-l", "--target-value", action="append", dest="target_values", help="Value to set the attribute to (targeronly)."), + make_option ("-r", "--source-element", action="append", dest="elements", help="Deprecated. please use -e or --element"), + make_option ("-t", "--source-attribute", action="append", dest="attributes", help="Deprecated. please use -a or --attribute"), + make_option ("-y", "--source-value", action="append", dest="values", help="Deprecated. please use -v or --value"), + make_option ("-j", "--target-element", action="append", dest="elements", help="Deprecated. please use -e or --element"), + make_option ("-k", "--target-attribute", action="append", dest="attributes", help="Deprecated. please use -a or --attribute"), + make_option ("-l", "--target-value", action="append", dest="values", help="Deprecated. please use -v or --value"), + make_option ("-i", "--index", type="int", dest="index", help="Index of the match. If none is specified, the changes will be applied to all matches within the document. Starts from zero."), + make_option ("-m", "--match", action="append", dest="matches", help="match a node by the given path. If -c is set, -e and/or -a with -v are set/appended to path. If -d is set nodes matching path are deleted"), + make_option ("-q", "--qualify", type="string", action="append", dest="qualifier", help="qualifies any node if given path matches to -m matched node as base. All other -m matching nodes are ignored") ] parser = OptionParser(usage, options_list) (options, args) = parser.parse_args() + # TODO -r -j can be appended to elements since nothing would change + # TODO -t -k can be appended to attributes since nothing would change + # TODO -y -l can be appended to values since nothing would change # Invalid Arguments Must be smited! if not options.doAdd and not options.doDelete and not options.gentoo_classpath: @@ -255,33 +335,40 @@ if options.doAdd and options.doDelete: error("Unable to perform multiple actions simultaneously.") - if not options.elements and not options.target_elements and not options.source_elements: - error("At least one element (global, source only or target only) and attribute must be specified.") + if not options.elements and not options.matches: + error("At least one element must be specified or given by match") - for elem in ( options.source_attributes or [] ): - if elem in ( options.attributes or [] ): - error("You can't set an attribute in global and source scope at the same time") - - for elem in ( options.target_attributes or [] ): - if elem in ( options.attributes or [] ): - error("You can't set an attribute in global and target scope at the same time") - - if options.doAdd and (len(options.values or []) != len(options.attributes or []) - or len(options.source_values or [] ) != len(options.source_attributes or []) - or len(options.target_values or [] ) != len(options.target_attributes or [])): + if options.doAdd and not options.matches and len(options.values or []) != len(options.attributes or []): error("You must give attribute(s)/value(s) for every element you are changing.") + if options.doDelete and options.matches and (options.elements or options.attributes or options.values): + error("You can't set -e, -a or -v while using a match to delete") + + if options.matches and options.elements and (len(options.matches or []) != len(options.elements or [])): + error("You must give a element for every match you define") + + if options.matches and options.values and options.elements and not options.attributes: + error("you can't define new Element and value without an attribute") + + if options.matches and options.values and (len(options.matches or []) != len(options.values or [])): + error("You must give a value for every match you define") + + if options.matches and options.qualifier and (len(options.matches or []) != len(options.qualifier or [])): + error("You must give a qualifier for every match you define") + + if options.matches and options.attributes and (len(options.attributes or []) != len(options.values or [])): + error("You must give a value for each attribute you give") + + # End Invalid Arguments Check def get_rewriter(options): - if options.index or options.doDelete or options.gentoo_classpath: + if options.index or options.doDelete or options.gentoo_classpath or options.matches: # java-ant-2.eclass does not use these options so we can optimize the ExpatWriter # and let the DomRewriter do these. Also keeps the index option compatible for sure. - rewriter = DomRewriter(options.elements, options.attributes, options.values, options.index) + rewriter = DomRewriter(options) else: - rewriter = SaxRewriter(options.elements, options.attributes, options.values, options.index, - options.source_elements, options.source_attributes, options.source_values, - options.target_elements, options.target_attributes, options.target_values) + rewriter = SaxRewriter(options.elements, options.attributes, options.values, options.index) return rewriter