Difference between revisions of "TEI2TMX.xsl"

From TEIWiki
Jump to navigation Jump to search
(now with proper use of indexes)
(archive.org for the win.)
 
(One intermediate revision by the same user not shown)
Line 3: Line 3:
  
 
== Introduction ==
 
== Introduction ==
This is a stylesheet to convert P5 TEI parallel-text documents to TMX (Translation Memory eXchange) files. It assumes that div-level tags are linked with corresp attributes to their translation in another language. The script has only been tested with bi-lingual documents and I have no reason to believe that it'll work with documents with more languages.
+
This is a stylesheet to convert [[P5]] TEI parallel-text documents to TMX (Translation Memory eXchange) files. It assumes that div-level tags are linked with [http://www.tei-c.org/release/doc/tei-p5-doc/en/html/ref-att.global.linking.html @corresp] attributes to their translation in another language. The script has only been tested with bi-lingual documents and I have no reason to believe that it'll work with documents with more than two languages.
 +
 
 +
== See Also ==
 +
* [http://en.wikipedia.org/wiki/Translation_Memory_eXchange Translation Memory eXchange on Wikipedia]
 +
* [http://web.archive.org/web/20101217191035/http://www.lisa.org/Translation-Memory-e.34.0.html Translation Memory eXchange spec in archive.org] (includes DTD)
  
 
== Stylesheet ==
 
== Stylesheet ==

Latest revision as of 00:00, 23 March 2017


Introduction

This is a stylesheet to convert P5 TEI parallel-text documents to TMX (Translation Memory eXchange) files. It assumes that div-level tags are linked with @corresp attributes to their translation in another language. The script has only been tested with bi-lingual documents and I have no reason to believe that it'll work with documents with more than two languages.

See Also

Stylesheet

<?xml version="1.0" encoding="UTF-8" ?>
<xsl:stylesheet version="1.0"
		xmlns:tei="http://www.tei-c.org/ns/1.0"
		xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
		exclude-result-prefixes="tei">
  
  <xsl:output encoding="UTF-8" />

  
  <xsl:key name="ids" match="*[@xml:id]" use="@xml:id"/>
  <xsl:key name="corresp" match="*[@corresp]" use="@corresp"/>
  
  <xsl:template match="/">
    <tmx version="1.4">
      <header adminlang="en" 
	      creationtool="TEI to TMX converter" 
	      creationtoolversion="0.1" 
	      datatype="tbx" 
	      o-tmf="unknown" 
	      segtype="block" 
	      srclang="en"/>
      <body>
	<xsl:apply-templates select="//*[@corresp]"/>
      </body>
    </tmx>
  </xsl:template>
  
  <xsl:template match="*[@corresp]">
    <xsl:variable name="local" select="."/>
    <xsl:variable name="remote" select="key('ids',translate(./@corresp,'#',''))"/>
    <tu tuid="{@xml:id}" >
      <tuv xml:lang="{$local/@xml:lang}"> 
	<seg>
	  <xsl:apply-templates select="$local/* |$local/text()" mode="inner"/>
	</seg> 
      </tuv>
      <tuv xml:lang="{$remote/@xml:lang}"> 
	<seg>
	  <xsl:apply-templates select="$remote/*|$remote/text()" mode="inner"/>
	</seg> 
      </tuv>
    </tu>
  </xsl:template>

  <xsl:template match="text()" >
    <xsl:text> </xsl:text>
  </xsl:template>
  
  <xsl:template match="text()" mode="inner">
    <xsl:value-of select="."/>
  </xsl:template>
  
  <xsl:template match="*[@corresp] |
		       *[key('corresp',concat('#',@xml:id))] |
		       tei:page |
		       tei:note" mode="inner" >
  </xsl:template>
  
  <!-- these tags need space -->
  <xsl:template match="tei:pubPlace |
		       tei:date |
		       tei:lb |
		       tei:name |
		       tei:byline |
		       tei:docAuthor" mode="inner" >
    <xsl:text> </xsl:text>
    <xsl:apply-templates select="* | text()" mode="inner"/>
    <xsl:text> </xsl:text>
  </xsl:template>

  <xsl:template match="*" >
    <xsl:apply-templates select="*"/>
  </xsl:template>
</xsl:stylesheet>