Difference between revisions of "IDREFs2bareNames.xslt"

From TEIWiki
Jump to navigation Jump to search
m (add to P4toP5 category)
m (remove blank before XML declaration)
Line 6: Line 6:
 
probably end up with valid pointers that point nowhere. :-(
 
probably end up with valid pointers that point nowhere. :-(
  
<pre><nowiki>
+
<pre><nowiki><?xml version="1.0" encoding="UTF-8"?>
<?xml version="1.0" encoding="UTF-8"?>
 
 
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
 
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
  

Revision as of 02:51, 6 October 2006

This little XSLT 1.0 stylesheet turns the values of IDREF and IDREFS attributes into bare name fragment identifiers, i.e. URI pointers into the base document.

Known bugs: we just presume that the base document (as defined by the xml:base= attribute) is the current document. If it's not, you'll probably end up with valid pointers that point nowhere. :-(

<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">

  <!-- This stylesheet reads in a TEI P4:2004 XML file and writes out -->
  <!-- the same file with a "#" stuck in front of every IDREF attribute -->
  <!-- value. This turns it into a bare name URI fragment identifier, -->
  <!-- and thus into a P5 local pointer. -->
  
  <!-- Copyleft 2006 Text Encoding Initiative Consortium -->

  <!-- Although this file was created by Syd Bauman, the -->
  <!-- algorithm is taken directly from Sebastian Rahtz's work in this -->
  <!-- area, and thus he deserves credit for the hard part. -->

  <!-- The terms "hash mark", "pound sign", and "number sign" are used -->
  <!-- interchangeably herein to name "#". :-) -->
  
  <!-- Copy over all the non-IDREF[S] parts of the input file -->
  <xsl:import href="Copy-All.xsl"/>

  <!-- Match an IDREF or IDREFS attribute. -->
  <!-- Note that we treat them the same, even -->
  <!-- though obviously a simpler, faster algorithm -->
  <!-- could be used for IDREF attributes. -->
  <!-- This list was created by editing the DTD created -->
  <!-- when the Pizza Chef is asked to bake all bases and -->
  <!-- all toppings at once, and then deleting */@lang, -->
  <!-- which needs to be handled separately, as in P5 it is -->
  <!-- no longer a pointer, but rather a key. -->
  <xsl:template
    match="
      */@grpPtr             | gram/@location        | div7/@decls
    | */@depPtr             | gram/@mergedin        | front/@decls
    | */@corresp            | gen/@location         | back/@decls
    | */@synch              | gen/@mergedin         | link/@targets
    | */@sameAs             | number/@location      | linkGrp/@domains
    | */@copyOf             | number/@mergedin      | when/@since
    | */@next               | case/@location        | timeline/@origin
    | */@prev               | case/@mergedin        | join/@targets
    | */@exclude            | per/@location         | joinGrp/@domains
    | */@select             | per/@mergedin         | alt/@targets
    | */@ana                | tns/@location         | altGrp/@domains
    | tagUsage/@render      | tns/@mergedin         | span/@inst
    | keywords/@scheme      | mood/@location        | span/@from
    | classCode/@scheme     | mood/@mergedin        | span/@to
    | catRef/@target        | itype/@location       | spanGrp/@inst
    | catRef/@scheme        | itype/@mergedin       | interp/@inst
    | gloss/@target         | gramGrp/@location     | interpGrp/@inst
    | abbr/@resp            | gramGrp/@mergedin     | fs/@feats
    | expan/@resp           | pos/@location         | f/@fVal
    | gap/@resp             | pos/@mergedin         | certainty/@target
    | gap/@hand             | subc/@location        | respons/@target
    | add/@resp             | subc/@mergedin        | addSpan/@resp
    | add/@hand             | colloc/@location      | addSpan/@hand
    | del/@resp             | colloc/@mergedin      | addSpan/@to
    | del/@hand             | def/@location         | delSpan/@resp
    | unclear/@hand         | def/@mergedin         | delSpan/@hand
    | ptr/@target           | trans/@location       | delSpan/@to
    | ref/@target           | trans/@mergedin       | restore/@resp
    | note/@target          | tr/@location          | restore/@hand
    | note/@targetEnd       | tr/@mergedin          | supplied/@hand
    | sp/@who               | etym/@location        | handShift/@new
    | move/@who             | etym/@mergedin        | handShift/@old
    | move/@perf            | lang/@location        | handShift/@resp
    | tech/@perf            | lang/@mergedin        | damage/@resp
    | u/@start              | eg/@location          | damage/@hand
    | u/@end                | eg/@mergedin          | app/@from
    | u/@decls              | usg/@location         | app/@to
    | u/@who                | usg/@mergedin         | lem/@hand
    | pause/@start          | lbl/@location         | rdg/@hand
    | pause/@end            | lbl/@mergedin         | rdgGrp/@hand
    | pause/@who            | xr/@location          | witDetail/@target
    | vocal/@start          | xr/@mergedin          | node/@value
    | vocal/@end            | re/@location          | node/@adjTo
    | vocal/@who            | re/@mergedin          | node/@adjFrom
    | kinesic/@start        | oRef/@target          | node/@adj
    | kinesic/@end          | oRef/@location        | arc/@from
    | kinesic/@who          | oRef/@mergedin        | arc/@to
    | event/@start          | oVar/@target          | root/@value
    | event/@end            | oVar/@location        | root/@children
    | event/@who            | oVar/@mergedin        | iNode/@value
    | writing/@who          | pRef/@target          | iNode/@children
    | writing/@script       | pRef/@location        | iNode/@parent
    | shift/@who            | pRef/@mergedin        | iNode/@follow
    | entryFree/@location   | pVar/@target          | leaf/@value
    | entryFree/@mergedin   | pVar/@location        | leaf/@parent
    | hom/@location         | pVar/@mergedin        | leaf/@follow
    | hom/@mergedin         | gram/@location        | eTree/@value
    | sense/@location       | gram/@mergedin        | triangle/@value
    | sense/@mergedin       | text/@decls           | eLeaf/@value
    | form/@location        | body/@decls           | occupation/@scheme
    | form/@mergedin        | group/@decls          | occupation/@code
    | orth/@location        | div/@decls            | socecStatus/@scheme
    | orth/@mergedin        | div0/@decls           | socecStatus/@code
    | pron/@location        | div1/@decls           | relation/@active
    | pron/@mergedin        | div2/@decls           | relation/@passive
    | hyph/@location        | div3/@decls           | setting/@who
    | hyph/@mergedin        | div4/@decls
    | syll/@location        | div5/@decls
    | syll/@mergedin        | div6/@decls
    ">
    <!-- We've matched an IDREF[S] attribute; output the -->
    <!-- same attribute name ... -->
    <xsl:attribute name="{name(.)}">
      <!-- ... with the value created by a sub-template ... -->
      <xsl:call-template name="tokenize-prepending-pound">
        <!-- ... called with the matched attribute's value -->
        <!-- as a parameter. -->
        <xsl:with-param name="value">
          <!-- Regularize whitespace before sending the value -->
          <!-- off, so that the called template can chop it -->
          <!-- based on single spaces only. -->
          <xsl:value-of select="normalize-space(.)"/>
        </xsl:with-param>
      </xsl:call-template>
    </xsl:attribute>
  </xsl:template>

  <!-- Tokenize the string passed as the 'value' parameter -->
  <!-- using blank as the inter token delimiter, and return -->
  <!-- the same with a "#" inserted directly in front of each -->
  <!-- token. -->
  <!-- This is done with a clever recursive algorithm that -->
  <!-- pops off the first token, sticks a "#" in front, and -->
  <!-- then calls self on the remaining string. -->
  <xsl:template name="tokenize-prepending-pound">
    <!-- get the string I was passed -->
    <xsl:param name="value"/>
    <!-- stick a hash mark into output -->
    <xsl:text>#</xsl:text>
    <xsl:choose>
      <xsl:when test="not(contains($value,' '))">
        <!-- there are no blanks in the parameter string, so -->
        <!-- return said string appended to previous hash mark -->
        <xsl:value-of select="$value"/>
      </xsl:when>
      <xsl:otherwise>
        <!-- there is at least one blank, so return -->
        <!-- the portion of the parameter string before -->
        <!-- the first blank, appended to the previous hash -->
        <!-- mark, and ... -->
        <xsl:value-of select="substring-before($value,' ')"/>
        <!-- ... followed by a blank (to separate it from the -->
        <!-- next bit of output) ... -->
        <xsl:text> </xsl:text>
        <!-- ... followed by the result of tokenizing and prepending -->
        <!-- a hash mark to the remaining (i.e., after the 1st -->
        <!-- blank) portion of the parameter string 'value'. -->
        <xsl:call-template name="tokenize-prepending-pound">
          <xsl:with-param name="value">
            <xsl:value-of select="substring-after($value,' ')"/>
          </xsl:with-param>
        </xsl:call-template>
      </xsl:otherwise>
    </xsl:choose>
  </xsl:template>

</xsl:stylesheet>

<!-- 
  Note to self
  ==== == ====
  * Make sure file that lists items is stored on Mac w/ Mac linends
  * rmac
  * pipe < IN | pad r 24 | snake 3 | console | pad r 120 | > OUT f 120
  * will need to twiddle the pad and snake values to get nice results
  * wmac
  * trim R padding
-->