IDREFs2bareNames.xslt

From TEIWiki
Jump to navigation Jump to search

This little XSLT 1.0 stylesheet turns the values of IDREF and IDREFS attributes into bare name fragment identifiers, i.e. URI pointers into the base document.

Known bugs: we just presume that the base document (as defined by the xml:base= attribute) is the current document. If it's not, you'll probably end up with valid pointers that point nowhere. :-(

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">

  <!-- This stylesheet reads in a TEI P4:2004 XML file and writes out -->
  <!-- the same file with a "#" stuck in front of every IDREF attribute -->
  <!-- value. This turns it into a bare name URI fragment identifier, -->
  <!-- and thus into a P5 local pointer. -->
  
  <!-- Copyleft 2006 Text Encoding Initiative Consortium -->

  <!-- Although this file was created by Syd Bauman, the -->
  <!-- algorithm is taken directly from Sebastian Rahtz's work in this -->
  <!-- area, and thus he deserves credit for the hard part. -->

  <!-- The terms "hash mark", "pound sign", and "number sign" are used -->
  <!-- interchangeably herein to name "#". :-) -->
  
  <!-- Copy over all the non-IDREF[S] parts of the input file -->
  <xsl:import href="Copy-All.xsl"/>

  <!-- Match an IDREF or IDREFS attribute. -->
  <!-- Note that we treat them the same, even -->
  <!-- though obviously a simpler, faster algorithm -->
  <!-- could be used for IDREF attributes. -->
  <!-- This list was created by editing the DTD created -->
  <!-- when the Pizza Chef is asked to bake all bases and -->
  <!-- all toppings at once, and then deleting */@lang, -->
  <!-- which needs to be handled separately, as in P5 it is -->
  <!-- no longer a pointer, but rather a key. -->
  <xsl:template
    match="
      */@grpPtr             | gram/@location        | div7/@decls
    | */@depPtr             | gram/@mergedin        | front/@decls
    | */@corresp            | gen/@location         | back/@decls
    | */@synch              | gen/@mergedin         | link/@targets
    | */@sameAs             | number/@location      | linkGrp/@domains
    | */@copyOf             | number/@mergedin      | when/@since
    | */@next               | case/@location        | timeline/@origin
    | */@prev               | case/@mergedin        | join/@targets
    | */@exclude            | per/@location         | joinGrp/@domains
    | */@select             | per/@mergedin         | alt/@targets
    | */@ana                | tns/@location         | altGrp/@domains
    | tagUsage/@render      | tns/@mergedin         | span/@inst
    | keywords/@scheme      | mood/@location        | span/@from
    | classCode/@scheme     | mood/@mergedin        | span/@to
    | catRef/@target        | itype/@location       | spanGrp/@inst
    | catRef/@scheme        | itype/@mergedin       | interp/@inst
    | gloss/@target         | gramGrp/@location     | interpGrp/@inst
    | abbr/@resp            | gramGrp/@mergedin     | fs/@feats
    | expan/@resp           | pos/@location         | f/@fVal
    | gap/@resp             | pos/@mergedin         | certainty/@target
    | gap/@hand             | subc/@location        | respons/@target
    | add/@resp             | subc/@mergedin        | addSpan/@resp
    | add/@hand             | colloc/@location      | addSpan/@hand
    | del/@resp             | colloc/@mergedin      | addSpan/@to
    | del/@hand             | def/@location         | delSpan/@resp
    | unclear/@hand         | def/@mergedin         | delSpan/@hand
    | ptr/@target           | trans/@location       | delSpan/@to
    | ref/@target           | trans/@mergedin       | restore/@resp
    | note/@target          | tr/@location          | restore/@hand
    | note/@targetEnd       | tr/@mergedin          | supplied/@hand
    | sp/@who               | etym/@location        | handShift/@new
    | move/@who             | etym/@mergedin        | handShift/@old
    | move/@perf            | lang/@location        | handShift/@resp
    | tech/@perf            | lang/@mergedin        | damage/@resp
    | u/@start              | eg/@location          | damage/@hand
    | u/@end                | eg/@mergedin          | app/@from
    | u/@decls              | usg/@location         | app/@to
    | u/@who                | usg/@mergedin         | lem/@hand
    | pause/@start          | lbl/@location         | rdg/@hand
    | pause/@end            | lbl/@mergedin         | rdgGrp/@hand
    | pause/@who            | xr/@location          | witDetail/@target
    | vocal/@start          | xr/@mergedin          | node/@value
    | vocal/@end            | re/@location          | node/@adjTo
    | vocal/@who            | re/@mergedin          | node/@adjFrom
    | kinesic/@start        | oRef/@target          | node/@adj
    | kinesic/@end          | oRef/@location        | arc/@from
    | kinesic/@who          | oRef/@mergedin        | arc/@to
    | event/@start          | oVar/@target          | root/@value
    | event/@end            | oVar/@location        | root/@children
    | event/@who            | oVar/@mergedin        | iNode/@value
    | writing/@who          | pRef/@target          | iNode/@children
    | writing/@script       | pRef/@location        | iNode/@parent
    | shift/@who            | pRef/@mergedin        | iNode/@follow
    | entryFree/@location   | pVar/@target          | leaf/@value
    | entryFree/@mergedin   | pVar/@location        | leaf/@parent
    | hom/@location         | pVar/@mergedin        | leaf/@follow
    | hom/@mergedin         | gram/@location        | eTree/@value
    | sense/@location       | gram/@mergedin        | triangle/@value
    | sense/@mergedin       | text/@decls           | eLeaf/@value
    | form/@location        | body/@decls           | occupation/@scheme
    | form/@mergedin        | group/@decls          | occupation/@code
    | orth/@location        | div/@decls            | socecStatus/@scheme
    | orth/@mergedin        | div0/@decls           | socecStatus/@code
    | pron/@location        | div1/@decls           | relation/@active
    | pron/@mergedin        | div2/@decls           | relation/@passive
    | hyph/@location        | div3/@decls           | setting/@who
    | hyph/@mergedin        | div4/@decls
    | syll/@location        | div5/@decls
    | syll/@mergedin        | div6/@decls
    ">
    <!-- We've matched an IDREF[S] attribute; output the -->
    <!-- same attribute name ... -->
    <xsl:attribute name="{name(.)}">
      <!-- ... with the value created by a sub-template ... -->
      <xsl:call-template name="tokenize-prepending-pound">
        <!-- ... called with the matched attribute's value -->
        <!-- as a parameter. -->
        <xsl:with-param name="value">
          <!-- Regularize whitespace before sending the value -->
          <!-- off, so that the called template can chop it -->
          <!-- based on single spaces only. -->
          <xsl:value-of select="normalize-space(.)"/>
        </xsl:with-param>
      </xsl:call-template>
    </xsl:attribute>
  </xsl:template>

  <!-- Tokenize the string passed as the 'value' parameter -->
  <!-- using blank as the inter token delimiter, and return -->
  <!-- the same with a "#" inserted directly in front of each -->
  <!-- token. -->
  <!-- This is done with a clever recursive algorithm that -->
  <!-- pops off the first token, sticks a "#" in front, and -->
  <!-- then calls self on the remaining string. -->
  <xsl:template name="tokenize-prepending-pound">
    <!-- get the string I was passed -->
    <xsl:param name="value"/>
    <!-- stick a hash mark into output -->
    <xsl:text>#</xsl:text>
    <xsl:choose>
      <xsl:when test="not(contains($value,' '))">
        <!-- there are no blanks in the parameter string, so -->
        <!-- return said string appended to previous hash mark -->
        <xsl:value-of select="$value"/>
      </xsl:when>
      <xsl:otherwise>
        <!-- there is at least one blank, so return -->
        <!-- the portion of the parameter string before -->
        <!-- the first blank, appended to the previous hash -->
        <!-- mark, and ... -->
        <xsl:value-of select="substring-before($value,' ')"/>
        <!-- ... followed by a blank (to separate it from the -->
        <!-- next bit of output) ... -->
        <xsl:text> </xsl:text>
        <!-- ... followed by the result of tokenizing and prepending -->
        <!-- a hash mark to the remaining (i.e., after the 1st -->
        <!-- blank) portion of the parameter string 'value'. -->
        <xsl:call-template name="tokenize-prepending-pound">
          <xsl:with-param name="value">
            <xsl:value-of select="substring-after($value,' ')"/>
          </xsl:with-param>
        </xsl:call-template>
      </xsl:otherwise>
    </xsl:choose>
  </xsl:template>

</xsl:stylesheet>

<!-- 
  Note to self
  ==== == ====
  * Make sure file that lists items is stored on Mac w/ Mac linends
  * rmac
  * pipe < IN | pad r 24 | snake 3 | console | pad r 120 | > OUT f 120
  * will need to twiddle the pad and snake values to get nice results
  * wmac
  * trim R padding
-->