summaryrefslogtreecommitdiff
blob: 08b9cbbcc0bbdf58c3cdf0beb16b7c156dfaaeec (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
Index: src/bsfix/xml-rewrite-2.py
===================================================================
--- src/bsfix/xml-rewrite-2.py	(revision 3421)
+++ src/bsfix/xml-rewrite-2.py	(working copy)
@@ -3,15 +3,17 @@
 
 # Copyright 2004-2006 Gentoo Foundation
 # Distributed under the terms of the GNU General Public Licence v2
-# $Header: /var/cvsroot/gentoo-src/javatoolkit/src/bsfix/xml-rewrite.py,v 1.6 2005/07/19 10:35:18 axxo Exp $
 
-# Author: Saleem Abdulrasool <compnerd@compnerd.org>
+# Authors: 
+#	Saleem Abdulrasool <compnerd@compnerd.org>
+#	Petteri Räty <betelgeuse@gentoo.org>
 # Maintainer: Gentoo Java Herd <java@gentoo.org>
 # Python based XML modifier
 
 # ChangeLog
 # Petteri Räty <betelgeuse@gentoo.org
 #	   December 06, 2006 - Changed to use xml.parsers.expat and basically rewrote the whole file
+#	   December 29, 2006 - Added a SAX based implementation to handle entities etc ( test on dev-java/skinlf )
 # Saleem A. <compnerd@compnerd.org>
 #	   December 23, 2004 - Initial Write
 #	   December 24, 2004 - Added usage information
@@ -26,6 +28,11 @@
 __version__ = "$Revision: 1.7 $"[11:-2]
 
 class DomRewriter:
+	"""
+	The old DOM rewriter is still around for index based stuff. It can
+	be used for all the complex stuff but portage needed features should
+	be in StreamRewriterBase subclasses as they are much faster.
+	"""
 	from xml.dom import NotFoundErr
 
 	def __init__(self, modifyElems, attributes, values=None, index=None):
@@ -61,40 +68,32 @@
 	def write(self,stream):
 		stream.write(self.document.toxml())
 
-class ExpatRewriter:
+class StreamRewriterBase:
 
 	def __init__(self, elems, attributes, values, index):
 		self.buffer = StringIO.StringIO()
-		self.p = self.buffer.write
+		self.__write = self.buffer.write
 		self.elems = elems
 		self.attributes = attributes
 		self.values = values
 
-	def process(self, in_stream):
-		from xml.parsers.expat import ParserCreate
-		parser = ParserCreate()
+	def p(self,str):
+		self.__write(str.encode('utf8'))
 
-		parser.StartElementHandler = self.start_element
-		parser.EndElementHandler = self.end_element
-		parser.CharacterDataHandler = self.char_data
-		parser.ParseFile(in_stream)
-		self.p('\n')
-
 	def write(self, out_stream):
-		out_stream.write(self.buffer.getvalue())
-		self.buffer.close()
-		self.buffer = StringIO.StringIO()
-		self.p = self.buffer.write
+		value = self.buffer.getvalue()
+		out_stream.write(value)
+		self.buffer.truncate(0)
 
 	def write_attr(self,a,v):
-		self.buffer.write('%s=%s ' % (a,quoteattr(v)))
+		self.p(u'%s=%s ' % (a,quoteattr(v, {u'©':'&#169;'})))
 
 	def start_element(self, name, attrs):
-		self.p('<%s ' % name)
+		self.p(u'<%s ' % name)
 
 		match = ( name in self.elems )
 		
-		for a,v in attrs.iteritems():
+		for a,v in attrs:
 			if not ( match and a in self.attributes ):
 				self.write_attr(a,v)
 		
@@ -102,14 +101,50 @@
 			for i, attr in enumerate(self.attributes):
 				self.write_attr(attr, self.values[i])
 
-		self.p('>')
+		self.p(u'>')
 
+class ExpatRewriter(StreamRewriterBase):
+	"""
+	The only problem with this Expat based implementation is that it does not
+	handle entities doctypes etc properly so for example dev-java/skinlf fails.
+	"""
+	def process(self, in_stream):
+		from xml.parsers.expat import ParserCreate
+		parser = ParserCreate()
+
+		parser.StartElementHandler = self.start_element
+		parser.EndElementHandler = self.end_element
+		parser.CharacterDataHandler = self.char_data
+		parser.ParseFile(in_stream)
+		self.p(u'\n')
+	
+	def start_element(self, name, attrs):
+		StreamRewriterBase(self, name, attrs.iteritems())
+
 	def end_element(self,name):
-		self.p('</%s>' % name)
+		self.p(u'</%s>' % name)
 
 	def char_data(self,data):
 		self.p(escape(data))
 
+from xml.sax.saxutils import XMLGenerator
+class SaxRewriter(XMLGenerator, StreamRewriterBase):
+	"""
+	Using Sax gives us the support for writing back doctypes and all easily
+	and is only marginally slower than expat as it is just a tight layer over it
+	"""
+	def __init__(self, elems, attributes, values, index):
+		StreamRewriterBase.__init__(self, elems, attributes, values, index)
+		XMLGenerator.__init__(self, self.buffer, 'UTF-8')
+
+	def process(self, in_stream):
+		from xml.sax import parse
+		parse(in_stream, self)
+		self.p(u'\n')
+
+	def startElement(self, name, attrs):
+		self.start_element(name, attrs.items())
+
 if __name__ == '__main__':
 	usage = "XML Rewrite Python Module Version " + __version__ + "\n"
 	usage += "Copyright 2004 Gentoo Foundation\n"
@@ -166,8 +201,6 @@
 		error("You must give value for every attribute you are changing.")
 	# End Invalid Arguments Check
 	
-	import codecs
-	
 	def get_rewriter(options):
 		if options.index or options.doDelete:
 			# java-ant-2.eclass does not use these options so we can optimize the ExpatWriter 
@@ -175,23 +208,32 @@
 			rewriter = DomRewriter(options.elements, options.attributes, options.values, options.index)
 			print "Using DOM to rewrite the build.xml files"
 		else:
-			rewriter = ExpatRewriter(options.elements, options.attributes, options.values, options.index)
-			print "Using Expat to rewrite the build.xml files"
+			rewriter = SaxRewriter(options.elements, options.attributes, options.values, options.index)
+			print "Using Sax to rewrite the build.xml files"
 
 		return rewriter
 	
 	rewriter = get_rewriter(options)
 
 	if options.files:
+		import os
 		for file in options.files:
 			print "Rewriting %s" % file
-			f = open(file,"r")
+			# First parse the file into memory
+			# Tricks with cwd are needed for relative includes of other xml files to build.xml files
+			cwd = os.getcwd()
+			dirname = os.path.dirname(file)
+			if dirname != '': # for file = build.xml comes out as ''
+				os.chdir(os.path.dirname(file))
+			f = open(os.path.basename(file),"r")
 			rewriter.process(f)
+			os.chdir(cwd)
 			f.close()		
+			# Then write it back to the file
 			f = open(file, "w")
-			rewriter.write(codecs.getwriter('utf-8')(f))
+			rewriter.write(f)
 			f.close()
 	else:
 		rewriter.process(sys.stdin)
-		rewriter.write(codecs.getwriter('utf-8')(sys.stdout))
+		rewriter.write(sys.stdout)