TEI2TMX.xsl
Introduction
This is a stylesheet to convert P5 TEI parallel-text documents to TMX (Translation Memory eXchange) files. It assumes that div-level tags are linked with @corresp attributes to their translation in another language. The script has only been tested with bi-lingual documents and I have no reason to believe that it'll work with documents with more than two languages.
See Also
- Translation Memory eXchange on Wikipedia
- Translation Memory eXchange spec in archive.org (includes DTD)
Stylesheet
<?xml version="1.0" encoding="UTF-8" ?>
<xsl:stylesheet version="1.0"
xmlns:tei="http://www.tei-c.org/ns/1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
exclude-result-prefixes="tei">
<xsl:output encoding="UTF-8" />
<xsl:key name="ids" match="*[@xml:id]" use="@xml:id"/>
<xsl:key name="corresp" match="*[@corresp]" use="@corresp"/>
<xsl:template match="/">
<tmx version="1.4">
<header adminlang="en"
creationtool="TEI to TMX converter"
creationtoolversion="0.1"
datatype="tbx"
o-tmf="unknown"
segtype="block"
srclang="en"/>
<body>
<xsl:apply-templates select="//*[@corresp]"/>
</body>
</tmx>
</xsl:template>
<xsl:template match="*[@corresp]">
<xsl:variable name="local" select="."/>
<xsl:variable name="remote" select="key('ids',translate(./@corresp,'#',''))"/>
<tu tuid="{@xml:id}" >
<tuv xml:lang="{$local/@xml:lang}">
<seg>
<xsl:apply-templates select="$local/* |$local/text()" mode="inner"/>
</seg>
</tuv>
<tuv xml:lang="{$remote/@xml:lang}">
<seg>
<xsl:apply-templates select="$remote/*|$remote/text()" mode="inner"/>
</seg>
</tuv>
</tu>
</xsl:template>
<xsl:template match="text()" >
<xsl:text> </xsl:text>
</xsl:template>
<xsl:template match="text()" mode="inner">
<xsl:value-of select="."/>
</xsl:template>
<xsl:template match="*[@corresp] |
*[key('corresp',concat('#',@xml:id))] |
tei:page |
tei:note" mode="inner" >
</xsl:template>
<!-- these tags need space -->
<xsl:template match="tei:pubPlace |
tei:date |
tei:lb |
tei:name |
tei:byline |
tei:docAuthor" mode="inner" >
<xsl:text> </xsl:text>
<xsl:apply-templates select="* | text()" mode="inner"/>
<xsl:text> </xsl:text>
</xsl:template>
<xsl:template match="*" >
<xsl:apply-templates select="*"/>
</xsl:template>
</xsl:stylesheet>