MilestoneXSLT

From TEIWiki
Jump to navigation Jump to search

Authorship

Author Raffaele Viglianti and Elena Pierazzo, King’s College London, raffaele.viglianti@kcl.ac.uk elena.pierazzo@kcl.ac.uk
Last revised 2012-06-21
Previous version 2008-06-23


Summary

This is a group of XSLT scripts for managing milestones (page breaks, line breaks, hand shifts, etc.) in XSLT 2.0.

These scripts originated from a poster presented at DH2008 by Elena Pierazzo and Raffaele Viglianti: XSLT (2.0) handbook for processing multiple hierarchies. An older version presented after the more efficient new version can also work with XSLT 1.0 Three different approaches are presented, ordered by complexity of the input and of the script as well.

Required Input

Script 1: expanding milestones

<TEI xmlns="http://www.tei-c.org/ns/1.0">
  <!-- header removed for brevity -->
  <text>
    <body>
    <p>
      Give me then your house & <w>Grounds</w>
       <lb/>
      I ask for nothing else 
    </p>
     </body>
  </text>
</TEI>

Script 2: splitting elements, then expanding milestones

<TEI xmlns="http://www.tei-c.org/ns/1.0">
  <!-- header removed for brevity -->
  <text>
    <body>
    <p>
      <del rend="overstrike">Card room where
         <lb/>
      nine out of ten had no inclination</del>
    </p>
     </body>
  </text>
</TEI>

Script 3: looping on textual nodes

<TEI xmlns="http://www.tei-c.org/ns/1.0">
  <!-- header removed for brevity -->
  <text>
    <body>
<pb xml:id="blvolthird-03"/>
     <div>
       <p><handShift new="#a1"/>
For James Edward Austen</p>
     </div>
     <div>
      <p><handShift new="#a2"/>
        Jane Austen May 6<hi rend="sup">th</hi> 1792</p>
     </div>
     </body>
  </text>
</TEI>


Expected Output

Script 1

the function would produce the following XML fragment as output:

<TEI xmlns="http://www.tei-c.org/ns/1.0">
  <text>
    <body>
    <p>
       <lb>Give me then your house & <w>Grounds</w></lb>
      <lb>I ask for nothing else</lb>
    </p>
     </body>
  </text>
</TEI>

Script 2

<TEI xmlns="http://www.tei-c.org/ns/1.0">
  <text>
    <body>
    <p>
      <lb>
        <del rend="overstrike">Card room where</del>
      </lb><lb>
        <del rend="overstrike">nine out of ten had no inclination</del>
      </lb>
    </p>
     </body>
  </text>
</TEI>

Script 3

<html xmlns="http://www.w3.org/1999/xhtml">
  <body>
     <div class="pb">blvolthird-03</div>
     <div>
       <p><span class="pencil">For James Edward Austen</span></p>
     </div>
     <div>
      <p>
        <span class="default">Jane Austen May 6</span><sup><span class="default">th</span></sup> <span class="default">1792</span></p>
     </div>
     </body>
</html>

Code (XSLT 2.0 only)

Script 1

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0"
    xpath-default-namespace="http://www.tei-c.org/ns/1.0" >
    <xsl:output method="xml" indent="yes" encoding="UTF-8" exclude-result-prefixes="#all"/>
    <xsl:template match="node()|@*">
        <!-- Copy the current node -->
        <xsl:copy>
            <!-- Including any child nodes it has and any attributes -->
            <xsl:apply-templates select="node()|@*"/>
        </xsl:copy>
    </xsl:template>
    <xsl:template match="p">
        <p xmlns="http://www.tei-c.org/ns/1.0">
            <xsl:for-each-group select="node()" group-starting-with="lb">
                <lb>
                    <xsl:sequence select="current()/ancestor::p[1]/lb/@*"/>
                    <xsl:apply-templates select="current-group()[not(self::lb)]"/>
                </lb>
            </xsl:for-each-group>
        </p>
    </xsl:template>
</xsl:stylesheet>

Script 2

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0"
    xpath-default-namespace="http://www.tei-c.org/ns/1.0">
    <xsl:output method="xml" indent="yes" encoding="UTF-8" exclude-result-prefixes="#all"/>
    <xsl:strip-space elements="*"/>

    <xsl:template name="start">
        <xsl:apply-templates select="TEI" mode="step1"/>
    </xsl:template>

    <xsl:variable name="step1">
        <xsl:call-template name="start"/>
    </xsl:variable>

    <xsl:variable name="step2">
        <xsl:apply-templates select="$step1" mode="step2"/>
    </xsl:variable>

    <xsl:template match="/">
        <!-- output -->
        <xsl:sequence select="$step2"/>
    </xsl:template>

    <xsl:template match="@*|node()" mode="step1">
        <xsl:copy>
            <xsl:apply-templates select="@*|node()" mode="step1"/>
        </xsl:copy>
    </xsl:template>

    <xsl:template match="del" mode="step1">
        <xsl:choose>
            <xsl:when test="lb">
                <xsl:for-each-group select="node()" group-starting-with="lb">
                    <!-- Copies the only lb in the group first (N.B the first group does not contain lb) -->
                    <xsl:sequence select="current-group()/descendant-or-self::lb"/>
                    <del xmlns="http://www.tei-c.org/ns/1.0">
                        <xsl:sequence select="current()/ancestor::matched-element/@*"/>
                        <!-- Copies the elements in the group except lb -->
                        <xsl:sequence select="current-group()[not(self::lb)]"/>
                    </del>
                </xsl:for-each-group>
            </xsl:when>
            <xsl:otherwise>
                <xsl:sequence select="."/>
            </xsl:otherwise>
        </xsl:choose>
    </xsl:template>

    <xsl:template match="@*|node()" mode="step2">
        <xsl:copy>
            <xsl:apply-templates select="@*|node()" mode="step2"/>
        </xsl:copy>
    </xsl:template>

    <xsl:template match="p" mode="step2">
        <xsl:variable name="cur-p" select="generate-id(.)"/>
        <p xmlns="http://www.tei-c.org/ns/1.0">
            <xsl:choose>
                <xsl:when test="descendant::lb">
                    <xsl:for-each-group select="node()" group-starting-with="lb">
                        <lb>
                            <xsl:sequence select="current()/ancestor::p[1]/lb/@*"/>
                            <xsl:apply-templates select="current-group()[not(self::lb)]"
                                mode="step2"/>
                        </lb>
                    </xsl:for-each-group>
                </xsl:when>
                <xsl:otherwise>
                    <xsl:apply-templates mode="step2"/>
                </xsl:otherwise>
            </xsl:choose>
        </p>
    </xsl:template>

    <xsl:template match="del" mode="step2">
        <del xmlns="http://www.tei-c.org/ns/1.0">
            <xsl:apply-templates mode="step2"/>
        </del>
    </xsl:template>

</xsl:stylesheet>

Script 3

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0"
    xpath-default-namespace="http://www.tei-c.org/ns/1.0">
    <xsl:output method="xhtml" indent="yes" encoding="UTF-8"
        doctype-public="-//W3C//DTD XHTML 1.0 Strict//EN"/>
    <xsl:strip-space elements="*"/>

    <xsl:template name="start">
        <xsl:apply-templates select="TEI" mode="step1"/>
    </xsl:template>

    <xsl:variable name="step1">
        <xsl:call-template name="start"/>
    </xsl:variable>

    <xsl:variable name="step2">
        <xsl:apply-templates select="$step1" mode="step2"/>
    </xsl:variable>

    <xsl:template match="/">
        <!-- output -->
        <xsl:sequence select="$step2"/>
    </xsl:template>

    <xsl:template match="@*|node()" mode="step1">
        <xsl:copy>
            <xsl:apply-templates select="@*|node()" mode="step1"/>
        </xsl:copy>
    </xsl:template>

    <xsl:template match="text()[not(ancestor::teiHeader)]" mode="step1">
        <handShift xmlns="http://www.tei-c.org/ns/1.0">
            <xsl:sequence select="preceding::handShift[1]/@new"/>
            <xsl:value-of select="."/>
        </handShift>
    </xsl:template>

    <xsl:template match="handShift" mode="step1"/>

    <xsl:template match="/" mode="step2">
        <html xmlns="http://www.w3.org/1999/xhtml">
            <head>
                <title/>
            </head>
            <body>
                <xsl:apply-templates mode="step2"/>
            </body>
        </html>
    </xsl:template>

    <xsl:template match="text//p" mode="step2">
        <xsl:variable name="cur-p" select="generate-id(.)"/>
        <p xmlns="http://www.w3.org/1999/xhtml">
            <xsl:choose>
                <xsl:when test="descendant::lb">
                    <xsl:for-each-group select="node()" group-starting-with="lb">
                        <lb>
                            <xsl:sequence select="current()/ancestor::p[1]/lb/@*"/>
                            <xsl:apply-templates select="current-group()[not(self::lb)]"
                                mode="step2"/>
                        </lb>
                    </xsl:for-each-group>
                </xsl:when>
                <xsl:otherwise>
                    <xsl:apply-templates mode="step2"/>
                </xsl:otherwise>
            </xsl:choose>
        </p>
    </xsl:template>

    <xsl:template match="handShift" mode="step2">
        <span xmlns="http://www.w3.org/1999/xhtml">
            <xsl:attribute name="class">
                <xsl:choose>
                    <xsl:when test="@new='#a2'">
                        <xsl:text>default</xsl:text>
                    </xsl:when>
                    <xsl:when test="@new='#a1'">
                        <xsl:text>pencil</xsl:text>
                    </xsl:when>
                </xsl:choose>
            </xsl:attribute>
            <xsl:apply-templates mode="step2"/>
        </span>
    </xsl:template>

</xsl:stylesheet>


XSLT 1.0

The code shown below avoids using xsl:for-each-group, which is exclusive of XSLT 2.0. It is still presented in XSLT 2.0 for brevity, as it allows to keep all steps in one script. With XSLT 1 it is possible to emulate each step by using more than one script and by producing intermediate files.


Using xsl:for-each-group and XSLT 2.0 is considerably faster and more efficient and, therefore, recommended.

Script 1 (compatible with XSLT 1.0)

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0" 
    xpath-default-namespace="http://www.tei-c.org/ns/1.0" xmlns:tei="http://www.tei-c.org/ns/1.0">
    <xsl:output method="xml" indent="yes" encoding="UTF-8"/>
    <xsl:template match="node()|@*">
        <!-- Copy the current node -->
        <xsl:copy>
            <!-- Including any child nodes it has and any attributes -->
            <xsl:apply-templates select="node()|@*"/>
        </xsl:copy>
    </xsl:template>
    <xsl:template match="p">
        <xsl:variable name="cur-p" select="generate-id(.)"/>
        <p><xsl:choose>
            <xsl:when test="lb">
                <xsl:for-each select="lb">
                    <lb>
                        <xsl:sequence select="parent::p/@*"/>

                        <xsl:apply-templates
                            select="preceding::*[parent::p[generate-id()=$cur-p]]|preceding::text()[parent::p[generate-id()=$cur-p]]"/>
                        <xsl:apply-templates
                            select="preceding::*/@*[parent::p[generate-id()=$cur-p]]"/>
                    </lb>
                    <lb>
                        <xsl:sequence select="parent::p/@*"/>
                        <xsl:sequence
                            select="following::*[parent::p[generate-id()=$cur-p]]|following::text()[parent::p[generate-id()=$cur-p]]"/>
                        <xsl:sequence select="following::*/@*[parent::p[generate-id()=$cur-p]]"/>
                    </lb>
                </xsl:for-each>
            </xsl:when>
            <xsl:otherwise>
                <xsl:sequence select="."/>
            </xsl:otherwise>
        </xsl:choose>
        </p>
    </xsl:template>
</xsl:stylesheet>

Script 2 (Can be split into two XSLT 1.0 scripts)

<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0"
    xpath-default-namespace="http://www.tei-c.org/ns/1.0" xmlns="http://www.tei-c.org/ns/1.0">
    <xsl:output method="xhtml" indent="yes" encoding="UTF-8"
        doctype-public="-//W3C//DTD XHTML 1.0 Strict//EN"/>
    <xsl:strip-space elements="*"/>

    <xsl:variable name="step1">
        <xsl:call-template name="one"/>
    </xsl:variable>

    <xsl:variable name="step2">
        <xsl:apply-templates select="$step1" mode="step2"/>
    </xsl:variable>

    <xsl:template match="*" mode="step1">
        <xsl:copy>
            <xsl:sequence select="@*"/>
            <xsl:apply-templates mode="step1"/>
        </xsl:copy>
    </xsl:template>

    <xsl:template match="del" mode="step1">
        <xsl:variable name="cur-del" select="generate-id(.)"/>
        <xsl:choose>
            <xsl:when test="lb">
                <xsl:for-each select="lb">
                    <del>
                        <xsl:choose>
                            <xsl:when test="parent::del/@type">
                                <xsl:variable name="type" select="generate-id(parent::del/@type)"/>
                                <xsl:sequence select="parent::del/@*[generate-id() != $type]"/>
                            </xsl:when>
                            <xsl:otherwise>
                                <xsl:sequence select="parent::del/@*"/>
                            </xsl:otherwise>
                        </xsl:choose>
                        <xsl:apply-templates
                            select="preceding::*[parent::del[generate-id()=$cur-del]]|preceding::text()[parent::del[generate-id()=$cur-del]]"/>
                        <xsl:apply-templates
                            select="preceding::*/@*[parent::del[generate-id()=$cur-del]]"/>
                    </del>

                    <lb>
                        <xsl:sequence select="@*"/>
                    </lb>

                    <del>
                        <xsl:sequence select="parent::del/@*"/>
                        <xsl:sequence
                            select="following::*[parent::del[generate-id()=$cur-del]]|following::text()[parent::del[generate-id()=$cur-del]]"/>
                        <xsl:sequence select="following::*/@*[parent::del[generate-id()=$cur-del]]"/>
                    </del>
                </xsl:for-each>
            </xsl:when>
            <xsl:otherwise>
                <xsl:sequence select="."/>
            </xsl:otherwise>
        </xsl:choose>

    </xsl:template>

    <xsl:template name="one">
        <xsl:apply-templates select="TEI" mode="step1"/>
    </xsl:template>

    <xsl:template match="/" mode="step2">
        <html>
            <body>
                <xsl:apply-templates mode="step2"/>
            </body>
        </html>
    </xsl:template>

    <xsl:template match="p" mode="step2">
        <xsl:variable name="cur-p" select="generate-id(.)"/>
        <p>
            <xsl:choose>
                <xsl:when test="exists(descendant::lb)">
                    <xsl:for-each select="descendant::lb">
                        <xsl:variable name="cur-lb" select="generate-id(.)"/>
                        <span class="line">
                            <xsl:for-each
                                select="preceding::node()[ancestor::p[generate-id()=$cur-p]]">

                                <xsl:choose>
                                    <xsl:when test="not(self::lb)">
                                        <xsl:if
                                            test="generate-id(following::lb[1])=$cur-lb and generate-id(parent::node()[1])=$cur-p">
                                            <xsl:apply-templates select="." mode="step2"/>
                                        </xsl:if>
                                    </xsl:when>
                                    <xsl:otherwise>
                                        <xsl:apply-templates select="." mode="step2"/>
                                    </xsl:otherwise>
                                </xsl:choose>
                            </xsl:for-each>
                        </span>
                        <xsl:if test="not(following::lb[parent::p[generate-id()=$cur-p]])">
                            <span class="line">
                                <xsl:for-each
                                    select="following::*[ancestor::p[generate-id()=$cur-p]]">
                                    <xsl:apply-templates select="." mode="step2"/>
                                </xsl:for-each>
                            </span>
                        </xsl:if>
                    </xsl:for-each>
                </xsl:when>
                <xsl:otherwise>
                    <xsl:apply-templates mode="step2"/>
                </xsl:otherwise>
            </xsl:choose>
        </p>
    </xsl:template>

    <xsl:template match="del" mode="step2">
        <del>
            <xsl:apply-templates mode="step2"/>
        </del>
    </xsl:template>

    <!-- OUTPUTTING -->

    <xsl:template match="/">
        <xsl:sequence select="$step2"/>
    </xsl:template>

</xsl:stylesheet>

Script 3 (Can be split into two XLST 1.0 scripts)

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0"
    xpath-default-namespace="http://www.tei-c.org/ns/1.0" xmlns="http://www.tei-c.org/ns/1.0">
    <xsl:output method="xhtml" indent="yes" encoding="UTF-8"
        doctype-public="-//W3C//DTD XHTML 1.0 Strict//EN"/>
    <xsl:strip-space elements="*"/>

    <xsl:variable name="step1">
        <xsl:call-template name="one"/>
    </xsl:variable>

    <xsl:variable name="step2">
        <xsl:apply-templates select="$step1" mode="step2"/>
    </xsl:variable>

    <xsl:template match="*" mode="step1">
        <xsl:copy>
            <xsl:sequence select="@*"/>
            <xsl:apply-templates mode="step1"/>
        </xsl:copy>
    </xsl:template>

    <xsl:template match="text()[not(ancestor::teiHeader)]" mode="step1">
        <handShift>
            <xsl:sequence select="preceding::handShift[1]/@new"/>
            <xsl:value-of select="."/>
        </handShift>
    </xsl:template>

    <xsl:template match="handShift" mode="step1"/>

    <xsl:template name="one">
        <xsl:apply-templates select="TEI" mode="step1"/>
    </xsl:template>

    <xsl:template match="/" mode="step2">
        <html>
            <body>
                <xsl:apply-templates mode="step2"/>
            </body>
        </html>
    </xsl:template>

    <xsl:template match="p" mode="step2">
        <xsl:variable name="cur-p" select="generate-id(.)"/>
        <p>
            <xsl:choose>
                <xsl:when test="exists(descendant::lb)">
                    <xsl:for-each select="descendant::lb">
                        <xsl:variable name="cur-lb" select="generate-id(.)"/>
                        <span class="line">
                            <xsl:for-each
                                select="preceding::node()[ancestor::p[generate-id()=$cur-p]]">
                                <xsl:choose>
                                    <xsl:when test="not(self::lb)">
                                        <xsl:if
                                            test="generate-id(following::lb[1])=$cur-lb and generate-id(parent::node()[1])=$cur-p">
                                            <xsl:apply-templates select="." mode="step2"/>
                                        </xsl:if>
                                    </xsl:when>
                                    <xsl:otherwise>
                                        <xsl:apply-templates select="." mode="step2"/>
                                    </xsl:otherwise>
                                </xsl:choose>
                            </xsl:for-each>
                        </span>

                        <xsl:if test="not(following::lb[parent::p[generate-id()=$cur-p]])">
                            <span class="line">
                                <xsl:for-each
                                    select="following::*[ancestor::p[generate-id()=$cur-p]]">
                                    <xsl:apply-templates select="." mode="step2"/>
                                </xsl:for-each>
                            </span>
                        </xsl:if>

                    </xsl:for-each>
                </xsl:when>
                <xsl:otherwise>
                    <xsl:apply-templates mode="step2"/>
                </xsl:otherwise>
            </xsl:choose>
        </p>
    </xsl:template>

    <xsl:template match="handShift" mode="step2">
        <span>
            <xsl:attribute name="class">
                <xsl:choose>
                    <xsl:when test="@new= '#a2'">
                        <xsl:text>default</xsl:text>
                    </xsl:when>
                    <xsl:when test="@new='#a1'">
                        <xsl:text>pencil</xsl:text>
                    </xsl:when>
                </xsl:choose>
            </xsl:attribute>
            <xsl:apply-templates mode="step2"/>
        </span>
    </xsl:template>
    <!-- OUTPUTTING -->

    <xsl:template match="/">
        <xsl:sequence select="$step2"/>
    </xsl:template>

</xsl:stylesheet>