IDREFs2bareNames.xslt
Revision as of 09:15, 14 March 2010 by Stuartyeates (talk | contribs)
This little XSLT 1.0 stylesheet turns the values of IDREF and IDREFS attributes into bare name fragment identifiers, i.e. URI pointers into the base document.
Known bugs: we just presume that the base document (as defined by the xml:base= attribute) is the current document. If it's not, you'll probably end up with valid pointers that point nowhere. :-(
<?xml version="1.0" encoding="UTF-8"?> <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> <!-- This stylesheet reads in a TEI P4:2004 XML file and writes out --> <!-- the same file with a "#" stuck in front of every IDREF attribute --> <!-- value. This turns it into a bare name URI fragment identifier, --> <!-- and thus into a P5 local pointer. --> <!-- Copyleft 2006 Text Encoding Initiative Consortium --> <!-- Although this file was created by Syd Bauman, the --> <!-- algorithm is taken directly from Sebastian Rahtz's work in this --> <!-- area, and thus he deserves credit for the hard part. --> <!-- The terms "hash mark", "pound sign", and "number sign" are used --> <!-- interchangeably herein to name "#". :-) --> <!-- Copy over all the non-IDREF[S] parts of the input file --> <xsl:import href="Copy-All.xsl"/> <!-- Match an IDREF or IDREFS attribute. --> <!-- Note that we treat them the same, even --> <!-- though obviously a simpler, faster algorithm --> <!-- could be used for IDREF attributes. --> <!-- This list was created by editing the DTD created --> <!-- when the Pizza Chef is asked to bake all bases and --> <!-- all toppings at once, and then deleting */@lang, --> <!-- which needs to be handled separately, as in P5 it is --> <!-- no longer a pointer, but rather a key. --> <xsl:template match=" */@grpPtr | gram/@location | div7/@decls | */@depPtr | gram/@mergedin | front/@decls | */@corresp | gen/@location | back/@decls | */@synch | gen/@mergedin | link/@targets | */@sameAs | number/@location | linkGrp/@domains | */@copyOf | number/@mergedin | when/@since | */@next | case/@location | timeline/@origin | */@prev | case/@mergedin | join/@targets | */@exclude | per/@location | joinGrp/@domains | */@select | per/@mergedin | alt/@targets | */@ana | tns/@location | altGrp/@domains | tagUsage/@render | tns/@mergedin | span/@inst | keywords/@scheme | mood/@location | span/@from | classCode/@scheme | mood/@mergedin | span/@to | catRef/@target | itype/@location | spanGrp/@inst | catRef/@scheme | itype/@mergedin | interp/@inst | gloss/@target | gramGrp/@location | interpGrp/@inst | abbr/@resp | gramGrp/@mergedin | fs/@feats | expan/@resp | pos/@location | f/@fVal | gap/@resp | pos/@mergedin | certainty/@target | gap/@hand | subc/@location | respons/@target | add/@resp | subc/@mergedin | addSpan/@resp | add/@hand | colloc/@location | addSpan/@hand | del/@resp | colloc/@mergedin | addSpan/@to | del/@hand | def/@location | delSpan/@resp | unclear/@hand | def/@mergedin | delSpan/@hand | ptr/@target | trans/@location | delSpan/@to | ref/@target | trans/@mergedin | restore/@resp | note/@target | tr/@location | restore/@hand | note/@targetEnd | tr/@mergedin | supplied/@hand | sp/@who | etym/@location | handShift/@new | move/@who | etym/@mergedin | handShift/@old | move/@perf | lang/@location | handShift/@resp | tech/@perf | lang/@mergedin | damage/@resp | u/@start | eg/@location | damage/@hand | u/@end | eg/@mergedin | app/@from | u/@decls | usg/@location | app/@to | u/@who | usg/@mergedin | lem/@hand | pause/@start | lbl/@location | rdg/@hand | pause/@end | lbl/@mergedin | rdgGrp/@hand | pause/@who | xr/@location | witDetail/@target | vocal/@start | xr/@mergedin | node/@value | vocal/@end | re/@location | node/@adjTo | vocal/@who | re/@mergedin | node/@adjFrom | kinesic/@start | oRef/@target | node/@adj | kinesic/@end | oRef/@location | arc/@from | kinesic/@who | oRef/@mergedin | arc/@to | event/@start | oVar/@target | root/@value | event/@end | oVar/@location | root/@children | event/@who | oVar/@mergedin | iNode/@value | writing/@who | pRef/@target | iNode/@children | writing/@script | pRef/@location | iNode/@parent | shift/@who | pRef/@mergedin | iNode/@follow | entryFree/@location | pVar/@target | leaf/@value | entryFree/@mergedin | pVar/@location | leaf/@parent | hom/@location | pVar/@mergedin | leaf/@follow | hom/@mergedin | gram/@location | eTree/@value | sense/@location | gram/@mergedin | triangle/@value | sense/@mergedin | text/@decls | eLeaf/@value | form/@location | body/@decls | occupation/@scheme | form/@mergedin | group/@decls | occupation/@code | orth/@location | div/@decls | socecStatus/@scheme | orth/@mergedin | div0/@decls | socecStatus/@code | pron/@location | div1/@decls | relation/@active | pron/@mergedin | div2/@decls | relation/@passive | hyph/@location | div3/@decls | setting/@who | hyph/@mergedin | div4/@decls | syll/@location | div5/@decls | syll/@mergedin | div6/@decls "> <!-- We've matched an IDREF[S] attribute; output the --> <!-- same attribute name ... --> <xsl:attribute name="{name(.)}"> <!-- ... with the value created by a sub-template ... --> <xsl:call-template name="tokenize-prepending-pound"> <!-- ... called with the matched attribute's value --> <!-- as a parameter. --> <xsl:with-param name="value"> <!-- Regularize whitespace before sending the value --> <!-- off, so that the called template can chop it --> <!-- based on single spaces only. --> <xsl:value-of select="normalize-space(.)"/> </xsl:with-param> </xsl:call-template> </xsl:attribute> </xsl:template> <!-- Tokenize the string passed as the 'value' parameter --> <!-- using blank as the inter token delimiter, and return --> <!-- the same with a "#" inserted directly in front of each --> <!-- token. --> <!-- This is done with a clever recursive algorithm that --> <!-- pops off the first token, sticks a "#" in front, and --> <!-- then calls self on the remaining string. --> <xsl:template name="tokenize-prepending-pound"> <!-- get the string I was passed --> <xsl:param name="value"/> <!-- stick a hash mark into output --> <xsl:text>#</xsl:text> <xsl:choose> <xsl:when test="not(contains($value,' '))"> <!-- there are no blanks in the parameter string, so --> <!-- return said string appended to previous hash mark --> <xsl:value-of select="$value"/> </xsl:when> <xsl:otherwise> <!-- there is at least one blank, so return --> <!-- the portion of the parameter string before --> <!-- the first blank, appended to the previous hash --> <!-- mark, and ... --> <xsl:value-of select="substring-before($value,' ')"/> <!-- ... followed by a blank (to separate it from the --> <!-- next bit of output) ... --> <xsl:text> </xsl:text> <!-- ... followed by the result of tokenizing and prepending --> <!-- a hash mark to the remaining (i.e., after the 1st --> <!-- blank) portion of the parameter string 'value'. --> <xsl:call-template name="tokenize-prepending-pound"> <xsl:with-param name="value"> <xsl:value-of select="substring-after($value,' ')"/> </xsl:with-param> </xsl:call-template> </xsl:otherwise> </xsl:choose> </xsl:template> </xsl:stylesheet> <!-- Note to self ==== == ==== * Make sure file that lists items is stored on Mac w/ Mac linends * rmac * pipe < IN | pad r 24 | snake 3 | console | pad r 120 | > OUT f 120 * will need to twiddle the pad and snake values to get nice results * wmac * trim R padding -->