Tei scancan p4 to p5

From TEIWiki
Jump to navigation Jump to search
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xmlns="http://www.tei-c.org/ns/1.0">
 
 <!-- Copyleft December 2006 by Martin Holmes. 
     This is a small, specialized stylesheet which is customized for one specific project
     (the Scandinavian-Canadians Studies Journal). It imports the generic Copy-All.xsl file, 
     but all other transformations are contained here. It's not expected to be generally
     useful, but if you have a simple project that needs a quick 
     conversion, this might serve as a starting point or a helpful example.
     
     Along with Copy-All.xsl, it does the whole job, including putting the result document 
     into the TEI namespace (by means of the xmlns attribute on the xsl:stylesheet root 
     element).
 -->
 
    <xsl:import href="Copy-All.xsl" />
    
    <xsl:output encoding="UTF-8" method="xml" />
    
    <!-- This attribute is for supplying a value for an attribute required in P5
        which is not present in the P4 XML. -->
    <xsl:variable name="DefAttVal">scancan</xsl:variable>
    
    <!-- Suppress matching of TEIform, status and other default attributes;
        I don't use these. -->
    <xsl:template match="@TEIform"></xsl:template>
    <xsl:template match="@status"></xsl:template>
    <xsl:template match="@default"></xsl:template>
    <xsl:template match="@org[.='uniform']"></xsl:template>
    <xsl:template match="@sample[.='complete']"></xsl:template>
    <xsl:template match="@part[.='N']"></xsl:template>
    <xsl:template match="@direct[.='unspecified']"></xsl:template>
    <xsl:template match="@targOrder[.='U']"></xsl:template>
    <xsl:template match="@from[.='ROOT']"></xsl:template>
    <xsl:template match="@anchored[.='yes']"></xsl:template>
    <xsl:template match="@place[.='unspecified']"></xsl:template>
    <xsl:template match="teiHeader/@type"></xsl:template>
    
    <!-- Convert id to xml:id. -->
    <xsl:template match="@id">
        <xsl:attribute name="xml:id"><xsl:value-of select="." /></xsl:attribute>
    </xsl:template>
    
    <xsl:template match="TEI.2">
        <xsl:element name="TEI" namespace="http://www.tei-c.org/ns/1.0">
            <!--<xsl:attribute name="xmlns">http://www.tei-c.org/ns/1.0</xsl:attribute>-->
            <!-- Add new attributes we need. -->
            <xsl:attribute name="xmlns:xsi">http://www.w3.org/2001/XMLSchema-instance</xsl:attribute>
            <xsl:attribute name="xsi:schemaLocation">http://www.tei-c.org/ns/1.0 tei_scancan_p5.xsd</xsl:attribute>
            <xsl:attribute name="version">5.0</xsl:attribute>
            <xsl:apply-templates select="* | @*" />
        </xsl:element>
    </xsl:template>
    
    <xsl:template match="teiCorpus.2">
        <xsl:element name="teiCorpus"> 
            <xsl:attribute name="xmlns:xsi">http://www.w3.org/2001/XMLSchema-instance</xsl:attribute>
            <xsl:attribute name="xsi:schemaLocation">http://www.tei-c.org/ns/1.0 tei_scancan_p5.xsd</xsl:attribute>
            <!--<xsl:attribute name="xmlns">http://www.tei-c.org/ns/1.0</xsl:attribute>-->
            <xsl:attribute name="version">5.0</xsl:attribute>
            <xsl:apply-templates select="* | @*" />
        </xsl:element>
    </xsl:template>
    
    <!-- Handling names and their regularization. -->
    <!-- First, suppress the reg attribute. -->
    <xsl:template match="@reg"></xsl:template>
    <xsl:template match="name">
        <xsl:element name="name">
            <xsl:apply-templates select="* | @* | text()" />
            <!-- Now turn the reg attribute into an element. -->
            <xsl:if test="@reg"><xsl:element name="reg"><xsl:value-of select="@reg" /></xsl:element></xsl:if>
        </xsl:element>
    </xsl:template>
    
    <!-- Suppress respStmt/lb -->
    <xsl:template match="respStmt/lb"></xsl:template>
    
    <!-- Supply a default scheme attribute for classCode and keywords elements. -->
    <xsl:template match="classCode | keywords">
        <xsl:element name="{local-name(.)}">
        <xsl:if test="not(@scheme)"><xsl:attribute name="scheme"><xsl:value-of select="$DefAttVal" /></xsl:attribute></xsl:if>
        <xsl:apply-templates select="* | @* | text()" />
        </xsl:element>
    </xsl:template>
    
    <!-- qs inside cits need to be converted to quote elements. -->
    <xsl:template match="cit/q">
        <xsl:element name="quote">
            <xsl:apply-templates select="* | @* | text()" />
        </xsl:element>
    </xsl:template>
    
    <!-- Special case, probably due to erroneous use of respStmt: we sometimes have 
        name elements embedded in resp elements. These need to be unravelled. -->
    <xsl:template match="respStmt/resp[child::name]">
        <xsl:element name="resp">
            <xsl:value-of select="child::text()" />
        </xsl:element>
        <xsl:apply-templates select="child::name" />
    </xsl:template>
    
    <!-- This is provisional, pending some feedback from TEI-L on how to encode email
        addresses in P5. -->
    <!-- Convert xptr elements for email addresses to ptr. -->
    <!-- First suppress the attributes we're going to harvest. -->
    <xsl:template match="xptr/@type | xptr/@to"></xsl:template>
    <xsl:template match="xptr">
        <xsl:element name="ptr">
            <xsl:if test="(@type='email') and (@to)">
                <xsl:attribute name="target">mailto:<xsl:value-of select="@to" /></xsl:attribute>
            </xsl:if>
            <xsl:apply-templates select="node() | text()" />
        </xsl:element>
    </xsl:template>
    
    <!-- In P5, the value attribute of the date element has been tightened up. 
        This handles date ranges which are expressed in date/@value attributes, 
        as well as other unacceptable values such as "1963a". -->
    <xsl:template match="date">
        <xsl:element name="date">
        <!-- We're going to assume (bravely) that date ranges are YYYY-YYYY. -->
        <xsl:choose>
            <xsl:when test="string-length(@value) = 9">
                <xsl:attribute name="from"><xsl:value-of select="substring-before(@value, '-')" /></xsl:attribute>
                <xsl:attribute name="to"><xsl:value-of select="substring-after(@value, '-')" /></xsl:attribute>
            </xsl:when>
            <!-- We're going to assume (again bravely) that date values used in biblios to
            tag items as "1963a, 1963b" etc. always have 5 characters. Shave off anything
            after the year, and put it into an n attribute. -->
            <xsl:when test="string-length(@value) = 5">
                <xsl:attribute name="value"><xsl:value-of select="substring(@value, 1, 4)" /></xsl:attribute>
                <xsl:attribute name="n"><xsl:value-of select="substring(@value, 5)" /></xsl:attribute>
            </xsl:when>
            <xsl:otherwise>
                <xsl:copy-of select="@value" />
            </xsl:otherwise>
        </xsl:choose>
            <xsl:apply-templates select="* | text() | @*[name() != 'value']" />
        </xsl:element>
    </xsl:template>
    
    <!-- Abbreviations and expansions. -->
    <xsl:template match="abbr[@expan]">
        <xsl:element name="choice">
            <xsl:element name="abbr"><xsl:value-of select="./child::text()" /></xsl:element>
            <xsl:element name="expan"><xsl:value-of select="@expan" /></xsl:element>
        </xsl:element>
    </xsl:template>
    
</xsl:stylesheet>