Difference between revisions of "TEI2TMX.xsl"

From TEIWiki
Jump to navigation Jump to search
(tweak stylesheet)
(archive.org for the win.)
 
(2 intermediate revisions by the same user not shown)
Line 3: Line 3:
  
 
== Introduction ==
 
== Introduction ==
This is a stylesheet to convert P5 TEI parallel-text documents to TMX (Translation Memory eXchange) files. It assumes that div-level tags are linked with corresp attributes to their translation in another language. The script has only been tested with bi-lingual documents and I have no reason to believe that it'll work with documents with more languages.
+
This is a stylesheet to convert [[P5]] TEI parallel-text documents to TMX (Translation Memory eXchange) files. It assumes that div-level tags are linked with [http://www.tei-c.org/release/doc/tei-p5-doc/en/html/ref-att.global.linking.html @corresp] attributes to their translation in another language. The script has only been tested with bi-lingual documents and I have no reason to believe that it'll work with documents with more than two languages.
 +
 
 +
== See Also ==
 +
* [http://en.wikipedia.org/wiki/Translation_Memory_eXchange Translation Memory eXchange on Wikipedia]
 +
* [http://web.archive.org/web/20101217191035/http://www.lisa.org/Translation-Memory-e.34.0.html Translation Memory eXchange spec in archive.org] (includes DTD)
  
 
== Stylesheet ==
 
== Stylesheet ==
Line 16: Line 20:
 
    
 
    
 
   <xsl:key name="ids" match="*[@xml:id]" use="@xml:id"/>
 
   <xsl:key name="ids" match="*[@xml:id]" use="@xml:id"/>
 +
  <xsl:key name="corresp" match="*[@corresp]" use="@corresp"/>
 
    
 
    
 
   <xsl:template match="/">
 
   <xsl:template match="/">
Line 58: Line 63:
 
    
 
    
 
   <xsl:template match="*[@corresp] |
 
   <xsl:template match="*[@corresp] |
      *[concat('@',xml:id)=//@corresp] |
+
      *[key('corresp',concat('#',@xml:id))] |
 
      tei:page |
 
      tei:page |
 
      tei:note" mode="inner" >
 
      tei:note" mode="inner" >
Line 78: Line 83:
 
     <xsl:apply-templates select="*"/>
 
     <xsl:apply-templates select="*"/>
 
   </xsl:template>
 
   </xsl:template>
 
 
 
</xsl:stylesheet>
 
</xsl:stylesheet>
 
</nowiki></pre>
 
</nowiki></pre>

Latest revision as of 00:00, 23 March 2017


Introduction

This is a stylesheet to convert P5 TEI parallel-text documents to TMX (Translation Memory eXchange) files. It assumes that div-level tags are linked with @corresp attributes to their translation in another language. The script has only been tested with bi-lingual documents and I have no reason to believe that it'll work with documents with more than two languages.

See Also

Stylesheet

<?xml version="1.0" encoding="UTF-8" ?>
<xsl:stylesheet version="1.0"
		xmlns:tei="http://www.tei-c.org/ns/1.0"
		xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
		exclude-result-prefixes="tei">
  
  <xsl:output encoding="UTF-8" />

  
  <xsl:key name="ids" match="*[@xml:id]" use="@xml:id"/>
  <xsl:key name="corresp" match="*[@corresp]" use="@corresp"/>
  
  <xsl:template match="/">
    <tmx version="1.4">
      <header adminlang="en" 
	      creationtool="TEI to TMX converter" 
	      creationtoolversion="0.1" 
	      datatype="tbx" 
	      o-tmf="unknown" 
	      segtype="block" 
	      srclang="en"/>
      <body>
	<xsl:apply-templates select="//*[@corresp]"/>
      </body>
    </tmx>
  </xsl:template>
  
  <xsl:template match="*[@corresp]">
    <xsl:variable name="local" select="."/>
    <xsl:variable name="remote" select="key('ids',translate(./@corresp,'#',''))"/>
    <tu tuid="{@xml:id}" >
      <tuv xml:lang="{$local/@xml:lang}"> 
	<seg>
	  <xsl:apply-templates select="$local/* |$local/text()" mode="inner"/>
	</seg> 
      </tuv>
      <tuv xml:lang="{$remote/@xml:lang}"> 
	<seg>
	  <xsl:apply-templates select="$remote/*|$remote/text()" mode="inner"/>
	</seg> 
      </tuv>
    </tu>
  </xsl:template>

  <xsl:template match="text()" >
    <xsl:text> </xsl:text>
  </xsl:template>
  
  <xsl:template match="text()" mode="inner">
    <xsl:value-of select="."/>
  </xsl:template>
  
  <xsl:template match="*[@corresp] |
		       *[key('corresp',concat('#',@xml:id))] |
		       tei:page |
		       tei:note" mode="inner" >
  </xsl:template>
  
  <!-- these tags need space -->
  <xsl:template match="tei:pubPlace |
		       tei:date |
		       tei:lb |
		       tei:name |
		       tei:byline |
		       tei:docAuthor" mode="inner" >
    <xsl:text> </xsl:text>
    <xsl:apply-templates select="* | text()" mode="inner"/>
    <xsl:text> </xsl:text>
  </xsl:template>

  <xsl:template match="*" >
    <xsl:apply-templates select="*"/>
  </xsl:template>
</xsl:stylesheet>