| Home | Trees | Indices | Help |
|
|---|
|
|
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2006-2009 Zuza Software Foundation
5 #
6 # This file is part of the Translate Toolkit.
7 #
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
17 #
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, see <http://www.gnu.org/licenses/>.
20
21 """Parent class for LISA standards (TMX, TBX, XLIFF)"""
22
23 import re
24
25 from translate.storage import base
26 from translate.lang import data
27 try:
28 from lxml import etree
29 from translate.misc.xml_helpers import getText, getXMLlang, setXMLlang, getXMLspace, setXMLspace, namespaced
30 except ImportError, e:
31 raise ImportError("lxml is not installed. It might be possible to continue without support for XML formats.")
32
33
35 """generate match objects for all L{re_obj} matches in L{text}."""
36 start = 0
37 max = len(text)
38 while start < max:
39 m = re_obj.search(text, start)
40 if not m:
41 break
42 yield m
43 start = m.end()
44
45 #TODO: we can now do better with our proper placeables support
46 placeholders = ['(%[diouxXeEfFgGcrs])', r'(\\+.?)', '(%[0-9]$lx)', '(%[0-9]\$[a-z])', '(<.+?>)']
47 re_placeholders = [re.compile(ph) for ph in placeholders]
49 'return list of regexp matchobjects for with all place holders in the L{text}'
50 matches = []
51 for re_ph in re_placeholders:
52 matches.extend(list(_findAllMatches(text, re_ph)))
53
54 # sort them so they come sequentially
55 matches.sort(lambda a, b: cmp(a.start(), b.start()))
56 return matches
57
58
60 """
61 A single unit in the file. Provisional work is done to make several
62 languages possible.
63 """
64
65 #The name of the root element of this unit type:(termEntry, tu, trans-unit)
66 rootNode = ""
67 #The name of the per language element of this unit type:(termEntry, tu, trans-unit)
68 languageNode = ""
69 #The name of the innermost element of this unit type:(term, seg)
70 textNode = ""
71
72 namespace = None
73 _default_xml_space = "preserve"
74 """The default handling of spacing in the absense of an xml:space attribute.
75
76 This is mostly for correcting XLIFF behaviour."""
77
79 """Constructs a unit containing the given source string"""
80 self._rich_source = None
81 self._rich_target = None
82 if empty:
83 return
84 self.xmlelement = etree.Element(self.namespaced(self.rootNode))
85 #add descrip, note, etc.
86 super(LISAunit, self).__init__(source)
87
89 """Compares two units"""
90 if not isinstance(other, LISAunit):
91 return super(LISAunit, self).__eq__(other)
92 languageNodes = self.getlanguageNodes()
93 otherlanguageNodes = other.getlanguageNodes()
94 if len(languageNodes) != len(otherlanguageNodes):
95 return False
96 for i in range(len(languageNodes)):
97 mytext = self.getNodeText(languageNodes[i], getXMLspace(self.xmlelement, self._default_xml_space))
98 othertext = other.getNodeText(otherlanguageNodes[i], getXMLspace(self.xmlelement, self._default_xml_space))
99 if mytext != othertext:
100 #TODO:^ maybe we want to take children and notes into account
101 return False
102 return True
103
105 """Returns name in Clark notation.
106
107 For example namespaced("source") in an XLIFF document might return::
108 {urn:oasis:names:tc:xliff:document:1.1}source
109 This is needed throughout lxml.
110 """
111 return namespaced(self.namespace, name)
112
114 languageNodes = self.getlanguageNodes()
115 if len(languageNodes) > 0:
116 self.xmlelement.replace(languageNodes[0], dom_node)
117 else:
118 self.xmlelement.append(dom_node)
119
122 source_dom = property(get_source_dom, set_source_dom)
123
125 if self._rich_source is not None:
126 self._rich_source = None
127 text = data.forceunicode(text)
128 self.source_dom = self.createlanguageNode(sourcelang, text, "source")
129
132 source = property(getsource, setsource)
133
135 languageNodes = self.getlanguageNodes()
136 assert len(languageNodes) > 0
137 if dom_node is not None:
138 if append or len(languageNodes) == 0:
139 self.xmlelement.append(dom_node)
140 else:
141 self.xmlelement.insert(1, dom_node)
142 if not append and len(languageNodes) > 1:
143 self.xmlelement.remove(languageNodes[1])
144
146 if lang:
147 return self.getlanguageNode(lang=lang)
148 else:
149 return self.getlanguageNode(lang=None, index=1)
150 target_dom = property(get_target_dom)
151
153 """Sets the "target" string (second language), or alternatively appends to the list"""
154 #XXX: we really need the language - can't really be optional, and we
155 # need to propagate it
156 if self._rich_target is not None:
157 self._rich_target = None
158 text = data.forceunicode(text)
159 #Firstly deal with reinitialising to None or setting to identical string
160 if self.gettarget() == text:
161 return
162 languageNode = self.get_target_dom(None)
163 if not text is None:
164 if languageNode is None:
165 languageNode = self.createlanguageNode(lang, text, "target")
166 self.set_target_dom(languageNode, append)
167 else:
168 if self.textNode:
169 terms = languageNode.iter(self.namespaced(self.textNode))
170 try:
171 languageNode = terms.next()
172 except StopIteration, e:
173 pass
174 languageNode.text = text
175 else:
176 self.set_target_dom(None, False)
177
179 """retrieves the "target" text (second entry), or the entry in the
180 specified language, if it exists"""
181 return self.getNodeText(self.get_target_dom(lang), getXMLspace(self.xmlelement, self._default_xml_space))
182 target = property(gettarget, settarget)
183
185 """Returns a xml Element setup with given parameters to represent a
186 single language entry. Has to be overridden."""
187 return None
188
190 """Create the text node in parent containing all the ph tags"""
191 matches = _getPhMatches(text)
192 if not matches:
193 parent.text = text
194 return
195
196 # Now we know there will definitely be some ph tags
197 start = matches[0].start()
198 pretext = text[:start]
199 if pretext:
200 parent.text = pretext
201 lasttag = parent
202 for i, m in enumerate(matches):
203 #pretext
204 pretext = text[start:m.start()]
205 # this will never happen with the first ph tag
206 if pretext:
207 lasttag.tail = pretext
208 #ph node
209 phnode = etree.SubElement(parent, self.namespaced("ph"))
210 phnode.set("id", str(i+1))
211 phnode.text = m.group()
212 lasttag = phnode
213 start = m.end()
214 #post text
215 if text[start:]:
216 lasttag.tail = text[start:]
217
219 """Returns a list of all nodes that contain per language information."""
220 return list(self.xmlelement.iterchildren(self.namespaced(self.languageNode)))
221
223 """Retrieves a languageNode either by language or by index"""
224 if lang is None and index is None:
225 raise KeyError("No criterea for languageNode given")
226 languageNodes = self.getlanguageNodes()
227 if lang:
228 for set in languageNodes:
229 if getXMLlang(set) == lang:
230 return set
231 else:#have to use index
232 if index >= len(languageNodes):
233 return None
234 else:
235 return languageNodes[index]
236 return None
237
239 """Retrieves the term from the given languageNode"""
240 if languageNode is None:
241 return None
242 if self.textNode:
243 terms = languageNode.iterdescendants(self.namespaced(self.textNode))
244 if terms is None:
245 return None
246 else:
247 return getText(terms.next(), xml_space)
248 else:
249 return getText(languageNode, xml_space)
250
253
256
257 xid = property(lambda self: self.xmlelement.attrib[self.namespaced('xid')],
258 lambda self, value: self._set_property(self.namespaced('xid'), value))
259
260 rid = property(lambda self: self.xmlelement.attrib[self.namespaced('rid')],
261 lambda self, value: self._set_property(self.namespaced('rid'), value))
262
267 createfromxmlElement = classmethod(createfromxmlElement)
268
270 """A class representing a file store for one of the LISA file formats."""
271 UnitClass = LISAunit
272 #The root node of the XML document:
273 rootNode = ""
274 #The root node of the content section:
275 bodyNode = ""
276 #The XML skeleton to use for empty construction:
277 XMLskeleton = ""
278
279 namespace = None
280
282 super(LISAfile, self).__init__(unitclass=unitclass)
283 if inputfile is not None:
284 self.parse(inputfile)
285 assert self.document.getroot().tag == self.namespaced(self.rootNode)
286 else:
287 # We strip out newlines to ensure that spaces in the skeleton doesn't
288 # interfere with the the pretty printing of lxml
289 self.parse(self.XMLskeleton.replace("\n", ""))
290 self.setsourcelanguage(sourcelanguage)
291 self.settargetlanguage(targetlanguage)
292 self.addheader()
293 self._encoding = "UTF-8"
294
298
300 """Returns name in Clark notation.
301
302 For example namespaced("source") in an XLIFF document might return::
303 {urn:oasis:names:tc:xliff:document:1.1}source
304 This is needed throughout lxml.
305 """
306 return namespaced(self.namespace, name)
307
309 """Initialises self.body so it never needs to be retrieved from the XML again."""
310 self.namespace = self.document.getroot().nsmap.get(None, None)
311 self.body = self.document.find('//%s' % self.namespaced(self.bodyNode))
312
314 #TODO: miskien moet hierdie eerder addsourcestring of iets genoem word?
315 """Adds and returns a new unit with the given string as first entry."""
316 newunit = self.UnitClass(source)
317 self.addunit(newunit)
318 return newunit
319
321 unit.namespace = self.namespace
322 super(LISAfile, self).addunit(unit)
323 if new:
324 self.body.append(unit.xmlelement)
325
327 """Converts to a string containing the file's XML"""
328 return etree.tostring(self.document, pretty_print=True, xml_declaration=True, encoding='utf-8')
329
331 """Populates this object from the given xml string"""
332 if not hasattr(self, 'filename'):
333 self.filename = getattr(xml, 'name', '')
334 if hasattr(xml, "read"):
335 xml.seek(0)
336 posrc = xml.read()
337 xml = posrc
338 if etree.LXML_VERSION >= (2, 1, 0):
339 #Since version 2.1.0 we can pass the strip_cdata parameter to
340 #indicate that we don't want cdata to be converted to raw XML
341 parser = etree.XMLParser(strip_cdata=False)
342 else:
343 parser = etree.XMLParser()
344 self.document = etree.fromstring(xml, parser).getroottree()
345 self._encoding = self.document.docinfo.encoding
346 self.initbody()
347 assert self.document.getroot().tag == self.namespaced(self.rootNode)
348 for entry in self.document.getroot().iterdescendants(self.namespaced(self.UnitClass.rootNode)):
349 term = self.UnitClass.createfromxmlElement(entry)
350 self.addunit(term, new=False)
351
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0.1 on Wed May 12 18:09:06 2010 | http://epydoc.sourceforge.net |