TEI2TMX.xsl

From TEIWiki

Jump to: navigation, search


Introduction

This is a stylesheet to convert P5 TEI parallel-text documents to TMX (Translation Memory eXchange) files. It assumes that div-level tags are linked with @corresp attributes to their translation in another language. The script has only been tested with bi-lingual documents and I have no reason to believe that it'll work with documents with more than two languages.

See Also

Stylesheet

<?xml version="1.0" encoding="UTF-8" ?>
<xsl:stylesheet version="1.0"
		xmlns:tei="http://www.tei-c.org/ns/1.0"
		xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
		exclude-result-prefixes="tei">
  
  <xsl:output encoding="UTF-8" />

  
  <xsl:key name="ids" match="*[@xml:id]" use="@xml:id"/>
  <xsl:key name="corresp" match="*[@corresp]" use="@corresp"/>
  
  <xsl:template match="/">
    <tmx version="1.4">
      <header adminlang="en" 
	      creationtool="TEI to TMX converter" 
	      creationtoolversion="0.1" 
	      datatype="tbx" 
	      o-tmf="unknown" 
	      segtype="block" 
	      srclang="en"/>
      <body>
	<xsl:apply-templates select="//*[@corresp]"/>
      </body>
    </tmx>
  </xsl:template>
  
  <xsl:template match="*[@corresp]">
    <xsl:variable name="local" select="."/>
    <xsl:variable name="remote" select="key('ids',translate(./@corresp,'#',''))"/>
    <tu tuid="{@xml:id}" >
      <tuv xml:lang="{$local/@xml:lang}"> 
	<seg>
	  <xsl:apply-templates select="$local/* |$local/text()" mode="inner"/>
	</seg> 
      </tuv>
      <tuv xml:lang="{$remote/@xml:lang}"> 
	<seg>
	  <xsl:apply-templates select="$remote/*|$remote/text()" mode="inner"/>
	</seg> 
      </tuv>
    </tu>
  </xsl:template>

  <xsl:template match="text()" >
    <xsl:text> </xsl:text>
  </xsl:template>
  
  <xsl:template match="text()" mode="inner">
    <xsl:value-of select="."/>
  </xsl:template>
  
  <xsl:template match="*[@corresp] |
		       *[key('corresp',concat('#',@xml:id))] |
		       tei:page |
		       tei:note" mode="inner" >
  </xsl:template>
  
  <!-- these tags need space -->
  <xsl:template match="tei:pubPlace |
		       tei:date |
		       tei:lb |
		       tei:name |
		       tei:byline |
		       tei:docAuthor" mode="inner" >
    <xsl:text> </xsl:text>
    <xsl:apply-templates select="* | text()" mode="inner"/>
    <xsl:text> </xsl:text>
  </xsl:template>

  <xsl:template match="*" >
    <xsl:apply-templates select="*"/>
  </xsl:template>
</xsl:stylesheet>
Personal tools