Package translate :: Package storage :: Package xml_extract :: Module generate
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.xml_extract.generate

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2002-2006 Zuza Software Foundation 
  5  # 
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21  # 
 22   
 23  import lxml.etree as etree 
 24   
 25  from translate.storage import base 
 26   
 27  from translate.misc.typecheck import accepts, IsCallable 
 28  from translate.misc.typecheck.typeclasses import Number 
 29  from translate.storage.xml_extract import misc 
 30  from translate.storage.xml_extract import extract 
 31  from translate.storage.xml_extract import unit_tree 
 32  from translate.storage.xml_name import XmlNamer 
33 34 35 @accepts(etree._Element) 36 -def _get_tag_arrays(dom_node):
37 """Return a dictionary indexed by child tag names, where each tag is associated with an array 38 of all the child nodes with matching the tag name, in the order in which they appear as children 39 of dom_node. 40 41 >>> xml = '<a><b></b><c></c><b></b><d/></a>' 42 >>> element = etree.fromstring(xml) 43 >>> get_tag_arrays(element) 44 {'b': [<Element a at 84df144>, <Element a at 84df148>], 'c': [<Element a at 84df120>], 'd': [<Element a at 84df152>]} 45 """ 46 child_dict = {} 47 for child in dom_node: 48 if child.tag not in child_dict: 49 child_dict[child.tag] = [] 50 child_dict[child.tag].append(child) 51 return child_dict
52
53 54 @accepts(etree._Element, unit_tree.XPathTree, IsCallable()) 55 -def apply_translations(dom_node, unit_node, do_translate):
56 tag_array = _get_tag_arrays(dom_node) 57 for unit_child_index, unit_child in unit_node.children.iteritems(): 58 tag, index = unit_child_index 59 try: 60 dom_child = tag_array[XmlNamer(dom_node).name(tag)][index] 61 apply_translations(dom_child, unit_child, do_translate) 62 # Raised if tag is not in tag_array. We might want to complain to the 63 # user in the future. 64 except KeyError: 65 pass 66 # Raised if index is not in tag_array[tag]. We might want to complain to 67 # the user in the future 68 except IndexError: 69 pass 70 # If there is a translation unit associated with this unit_node... 71 if unit_node.unit != None: 72 # The invoke do_translate on the dom_node and the unit; do_translate 73 # should replace the text in dom_node with the text in unit_node. 74 do_translate(dom_node, unit_node.unit)
75
76 77 @accepts(IsCallable(), etree._Element, state=[Number]) 78 -def reduce_dom_tree(f, dom_node, *state):
79 return misc.reduce_tree(f, dom_node, dom_node, lambda dom_node: dom_node, *state)
80
81 82 @accepts(etree._Element, etree._Element) 83 -def find_dom_root(parent_dom_node, dom_node):
84 """@see: L{find_placeable_dom_tree_roots}""" 85 if dom_node is None or parent_dom_node is None: 86 return None 87 if dom_node.getparent() == parent_dom_node: 88 return dom_node 89 elif dom_node.getparent() is None: 90 return None 91 else: 92 return find_dom_root(parent_dom_node, dom_node.getparent())
93 94 95 @accepts(extract.Translatable)
96 -def find_placeable_dom_tree_roots(unit_node):
97 """For an inline placeable, find the root DOM node for the placeable in its 98 parent. 99 100 Consider the diagram. In this pseudo-ODF example, there is an inline span 101 element. However, the span is contained in other tags (which we never process). 102 When splicing the template DOM tree (that is, the DOM which comes from 103 the XML document we're using to generate a translated XML document), we'll 104 need to move DOM sub-trees around and we need the roots of these sub-trees:: 105 106 <p> This is text \/ <- Paragraph containing an inline placeable 107 <blah> <- Inline placeable's root (which we want to find) 108 ... <- Any number of intermediate DOM nodes 109 <span> bold text <- The inline placeable's Translatable 110 holds a reference to this DOM node 111 """ 112 113 def set_dom_root_for_unit_node(parent_unit_node, unit_node, dom_tree_roots): 114 dom_tree_roots[unit_node] = find_dom_root(parent_unit_node.dom_node, unit_node.dom_node) 115 return dom_tree_roots
116 return extract.reduce_unit_tree(set_dom_root_for_unit_node, unit_node, {}) 117 118 119 @accepts(extract.Translatable, etree._Element)
120 -def _map_source_dom_to_doc_dom(unit_node, source_dom_node):
121 """Creating a mapping from the DOM nodes in source_dom_node which correspond to 122 placeables, with DOM nodes in the XML document template (this information is obtained 123 from unit_node). We are interested in DOM nodes in the XML document template which 124 are the roots of placeables. See the diagram below, as well as 125 L{find_placeable_dom_tree_roots}. 126 127 XLIFF Source (below):: 128 <source>This is text <g> bold text</g> and a footnote<x/></source> 129 / \________ 130 / \ 131 <p>This is text<blah>...<span> bold text</span>...</blah> and <note>...</note></p> 132 Input XML document used as a template (above) 133 134 In the above diagram, the XLIFF source DOM node <g> is associated with the XML 135 document DOM node <blah>, whereas the XLIFF source DOM node <x> is associated with 136 the XML document DOM node <note>. 137 """ 138 dom_tree_roots = find_placeable_dom_tree_roots(unit_node) 139 source_dom_to_doc_dom = {} 140 141 def loop(unit_node, source_dom_node): 142 for child_unit_node, child_source_dom in zip(unit_node.placeables, source_dom_node): 143 source_dom_to_doc_dom[child_source_dom] = dom_tree_roots[child_unit_node] 144 loop(child_unit_node, child_source_dom)
145 146 loop(unit_node, source_dom_node) 147 return source_dom_to_doc_dom 148
149 150 @accepts(etree._Element, etree._Element) 151 -def _map_target_dom_to_source_dom(source_dom_node, target_dom_node):
152 """Associate placeables in source_dom_node and target_dom_node which 153 have the same 'id' attributes. 154 155 We're using XLIFF placeables. The XLIFF standard requires that 156 placeables have unique ids. The id of a placeable is never modified, 157 which means that even if placeables are moved around in a translation, 158 we can easily associate placeables from the source text with placeables 159 in the target text. 160 161 This function does exactly that. 162 """ 163 164 def map_id_to_dom_node(parent_node, node, id_to_dom_node): 165 # If this DOM node has an 'id' attribute, then add an id -> node 166 # mapping to 'id_to_dom_node'. 167 if u'id' in node.attrib: 168 id_to_dom_node[node.attrib[u'id']] = node 169 return id_to_dom_node
170 171 # Build a mapping of id attributes to the DOM nodes which have these ids. 172 id_to_dom_node = reduce_dom_tree(map_id_to_dom_node, target_dom_node, {}) 173 174 def map_target_dom_to_source_dom_aux(parent_node, node, target_dom_to_source_dom): 175 # 176 if u'id' in node.attrib and node.attrib[u'id'] in id_to_dom_node: 177 target_dom_to_source_dom[id_to_dom_node[node.attrib[u'id']]] = node 178 return target_dom_to_source_dom 179 180 # For each node in the DOM tree rooted at source_dom_node: 181 # 1. Check whether the node has an 'id' attribute. 182 # 2. If so, check whether there is a mapping of this id to a target DOM node 183 # in id_to_dom_node. 184 # 3. If so, associate this source DOM node with the target DOM node. 185 return reduce_dom_tree(map_target_dom_to_source_dom_aux, source_dom_node, {}) 186
187 188 -def _build_target_dom_to_doc_dom(unit_node, source_dom, target_dom):
189 source_dom_to_doc_dom = _map_source_dom_to_doc_dom(unit_node, source_dom) 190 target_dom_to_source_dom = _map_target_dom_to_source_dom(source_dom, target_dom) 191 return misc.compose_mappings(target_dom_to_source_dom, source_dom_to_doc_dom)
192
193 194 @accepts(etree._Element, {etree._Element: etree._Element}) 195 -def _get_translated_node(target_node, target_dom_to_doc_dom):
196 """Convenience function to get node corresponding to 'target_node' 197 and to assign the tail text of 'target_node' to this node.""" 198 dom_node = target_dom_to_doc_dom[target_node] 199 dom_node.tail = target_node.tail 200 return dom_node
201
202 203 @accepts(etree._Element, etree._Element, {etree._Element: etree._Element}) 204 -def _build_translated_dom(dom_node, target_node, target_dom_to_doc_dom):
205 """Use the "shape" of 'target_node' (which is a DOM tree) to insert nodes 206 into the DOM tree rooted at 'dom_node'. 207 208 The mapping 'target_dom_to_doc_dom' is used to map nodes from 'target_node' 209 to nodes which much be inserted into dom_node. 210 """ 211 dom_node.text = target_node.text 212 # 1. Find all child nodes of target_node. 213 # 2. Filter out the children which map to None. 214 # 3. Call _get_translated_node on the remaining children; this maps a node in 215 # 'target_node' to a node in 'dom_node' and assigns the tail text of 'target_node' 216 # to the mapped node. 217 # 4. Add all of these mapped nodes to 'dom_node' 218 dom_node.extend(_get_translated_node(child, target_dom_to_doc_dom) for child in target_node 219 if target_dom_to_doc_dom[child] is not None) 220 # Recursively call this function on pairs of matched children in 221 # dom_node and target_node. 222 for dom_child, target_child in zip(dom_node, target_node): 223 _build_translated_dom(dom_child, target_child, target_dom_to_doc_dom)
224
225 226 @accepts(IsCallable()) 227 -def replace_dom_text(make_parse_state):
228 """Return a function:: 229 230 action: etree_Element x base.TranslationUnit -> None 231 232 which takes a dom_node and a translation unit. The dom_node is rearranged 233 according to rearrangement of placeables in unit.target (relative to their 234 positions in unit.source). 235 """ 236 237 @accepts(etree._Element, base.TranslationUnit) 238 def action(dom_node, unit): 239 """Use the unit's target (or source in the case where there is no translation) 240 to update the text in the dom_node and at the tails of its children.""" 241 source_dom = unit.source_dom 242 if unit.target_dom is not None: 243 target_dom = unit.target_dom 244 else: 245 target_dom = unit.source_dom 246 # Build a tree of (non-DOM) nodes which correspond to the translatable DOM nodes in 'dom_node'. 247 # Pass in a fresh parse_state every time, so as avoid working with stale parse state info. 248 unit_node = extract.find_translatable_dom_nodes(dom_node, make_parse_state())[0] 249 target_dom_to_doc_dom = _build_target_dom_to_doc_dom(unit_node, source_dom, target_dom) 250 # Before we start reconstructing the sub-tree rooted at dom_node, we must clear out its children 251 dom_node[:] = [] 252 _build_translated_dom(dom_node, target_dom, target_dom_to_doc_dom)
253 254 return action 255