<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://wiki.tei-c.org/index.php?action=history&amp;feed=atom&amp;title=List_words.xslt</id>
	<title>List words.xslt - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://wiki.tei-c.org/index.php?action=history&amp;feed=atom&amp;title=List_words.xslt"/>
	<link rel="alternate" type="text/html" href="https://wiki.tei-c.org/index.php?title=List_words.xslt&amp;action=history"/>
	<updated>2026-04-21T17:43:14Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.32.0</generator>
	<entry>
		<id>https://wiki.tei-c.org/index.php?title=List_words.xslt&amp;diff=10972&amp;oldid=prev</id>
		<title>Syd: escape ampersand of entity reference</title>
		<link rel="alternate" type="text/html" href="https://wiki.tei-c.org/index.php?title=List_words.xslt&amp;diff=10972&amp;oldid=prev"/>
		<updated>2012-07-18T04:32:42Z</updated>

		<summary type="html">&lt;p&gt;escape ampersand of entity reference&lt;/p&gt;
&lt;table class=&quot;diff diff-contentalign-left&quot; data-mw=&quot;interface&quot;&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;col class=&quot;diff-marker&quot; /&gt;
				&lt;col class=&quot;diff-content&quot; /&gt;
				&lt;tr class=&quot;diff-title&quot; lang=&quot;en&quot;&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;← Older revision&lt;/td&gt;
				&lt;td colspan=&quot;2&quot; style=&quot;background-color: #fff; color: #222; text-align: center;&quot;&gt;Revision as of 04:32, 18 July 2012&lt;/td&gt;
				&lt;/tr&gt;&lt;tr&gt;&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot; id=&quot;mw-diff-left-l49&quot; &gt;Line 49:&lt;/td&gt;
&lt;td colspan=&quot;2&quot; class=&quot;diff-lineno&quot;&gt;Line 49:&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;   &amp;lt;xsl:template match=&amp;quot;text()&amp;quot;&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;   &amp;lt;xsl:template match=&amp;quot;text()&amp;quot;&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;     &amp;lt;xsl:variable name=&amp;quot;no-punctuation&amp;quot;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;     &amp;lt;xsl:variable name=&amp;quot;no-punctuation&amp;quot;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt;−&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #ffe49c; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;       select=&amp;quot;translate( ., '“”,.;:?!()-–—…&amp;amp;quot;',&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt;+&lt;/td&gt;&lt;td style=&quot;color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #a3d3ff; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;       select=&amp;quot;translate( ., '“”,.;:?!()-–—…&amp;amp;&lt;ins class=&quot;diffchange diffchange-inline&quot;&gt;amp;&lt;/ins&gt;quot;',&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;                             '               ')&amp;quot;/&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;                             '               ')&amp;quot;/&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;tr&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;     &amp;lt;xsl:choose&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;td class='diff-marker'&gt; &lt;/td&gt;&lt;td style=&quot;background-color: #f8f9fa; color: #222; font-size: 88%; border-style: solid; border-width: 1px 1px 1px 4px; border-radius: 0.33em; border-color: #eaecf0; vertical-align: top; white-space: pre-wrap;&quot;&gt;&lt;div&gt;     &amp;lt;xsl:choose&amp;gt;&lt;/div&gt;&lt;/td&gt;&lt;/tr&gt;
&lt;/table&gt;</summary>
		<author><name>Syd</name></author>
		
	</entry>
	<entry>
		<id>https://wiki.tei-c.org/index.php?title=List_words.xslt&amp;diff=10971&amp;oldid=prev</id>
		<title>Syd: Created page with &quot;This is a relatively simple example of a stylesheet that will generate a list of words, ostensibly for text analysis. It is '''not''' intended to be used unmodified. The values o...&quot;</title>
		<link rel="alternate" type="text/html" href="https://wiki.tei-c.org/index.php?title=List_words.xslt&amp;diff=10971&amp;oldid=prev"/>
		<updated>2012-07-18T04:31:32Z</updated>

		<summary type="html">&lt;p&gt;Created page with &amp;quot;This is a relatively simple example of a stylesheet that will generate a list of words, ostensibly for text analysis. It is &amp;#039;&amp;#039;&amp;#039;not&amp;#039;&amp;#039;&amp;#039; intended to be used unmodified. The values o...&amp;quot;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;This is a relatively simple example of a stylesheet that will generate a list of words, ostensibly for text analysis. It is '''not''' intended to be used unmodified. The values of &amp;lt;tt&amp;gt;type=&amp;lt;/tt&amp;gt; attributes of &amp;lt;tt&amp;gt;&amp;amp;lt;div&amp;gt;&amp;lt;/tt&amp;gt;, for example, should be changed to match your project's.&lt;br /&gt;
Note that this stylesheet does '''not''' handle end-of-line hyphenated words properly. You should run your document through something like [[Unshy.xslt]] first.&lt;br /&gt;
&amp;lt;pre&amp;gt;&amp;lt;nowiki&amp;gt;&lt;br /&gt;
&amp;lt;xsl:stylesheet version=&amp;quot;1.0&amp;quot;&lt;br /&gt;
  xmlns:xsl=&amp;quot;http://www.w3.org/1999/XSL/Transform&amp;quot;&lt;br /&gt;
  xmlns:in=&amp;quot;http://www.tei-c.org/ns/1.0&amp;quot;&amp;gt;&lt;br /&gt;
&lt;br /&gt;
  &amp;lt;!--&lt;br /&gt;
  Read in a TEI P5 file, and spit out a text list of words that&lt;br /&gt;
  subsequent analysis may find useful.&lt;br /&gt;
  Written 2012-07-18 by Syd Bauman, based entirely on WWP's dev/only_words.xslt&lt;br /&gt;
  Copyleft 2010 by Syd Bauman and the Brown WWP&lt;br /&gt;
  --&amp;gt;&lt;br /&gt;
&lt;br /&gt;
  &amp;lt;!-- First, tell the XSLT engine that we’d like to spit out plain text (instead of --&amp;gt;&lt;br /&gt;
  &amp;lt;!-- the default, which is XML) --&amp;gt;&lt;br /&gt;
  &amp;lt;xsl:output method=&amp;quot;text&amp;quot;/&amp;gt;&lt;br /&gt;
&lt;br /&gt;
  &amp;lt;!-- Ascertain whether the user wants case-folding or not. Default is yes --&amp;gt;&lt;br /&gt;
  &amp;lt;xsl:param name=&amp;quot;case-fold&amp;quot; select=&amp;quot;true()&amp;quot;/&amp;gt;&lt;br /&gt;
  &lt;br /&gt;
  &amp;lt;!-- Match the document root (which itself contains at least the root element, and probably --&amp;gt;&lt;br /&gt;
  &amp;lt;!-- some comments and processing instructions, too) --&amp;gt;&lt;br /&gt;
  &amp;lt;xsl:template match=&amp;quot;/&amp;quot;&amp;gt;&lt;br /&gt;
    &amp;lt;!-- In our first pass through the document tree, we don't spit the output out, but --&amp;gt;&lt;br /&gt;
    &amp;lt;!-- rather save it in a variable for further processing. --&amp;gt;&lt;br /&gt;
    &amp;lt;xsl:variable name=&amp;quot;documentData&amp;quot;&amp;gt;&lt;br /&gt;
      &amp;lt;!-- process the TEI document or documents, whether we're reading a corpus or not --&amp;gt;&lt;br /&gt;
      &amp;lt;xsl:apply-templates select=&amp;quot;/in:TEI | /in:teiCorpus/in:TEI&amp;quot;/&amp;gt;&lt;br /&gt;
    &amp;lt;/xsl:variable&amp;gt;&lt;br /&gt;
    &amp;lt;!-- Now we have all of the “words” in the variable, but we also have lots of --&amp;gt;&lt;br /&gt;
    &amp;lt;!-- whitespace, some of it quite weird. What we’d like is just a list, so --&amp;gt;&lt;br /&gt;
    &amp;lt;!-- convert it. First, reduce all sequences of one or more whitespace characters --&amp;gt;&lt;br /&gt;
    &amp;lt;!-- to a single blank; then turn all blanks to newlines. --&amp;gt;&lt;br /&gt;
    &amp;lt;xsl:value-of select=&amp;quot;translate(normalize-space($documentData),'&amp;amp;#x20;','&amp;amp;#x0A;')&amp;quot;/&amp;gt;&lt;br /&gt;
  &amp;lt;/xsl:template&amp;gt;&lt;br /&gt;
&lt;br /&gt;
  &amp;lt;!-- For any element that isn’t matched in a specific template, below, --&amp;gt;&lt;br /&gt;
  &amp;lt;!-- just process all of its element and textual children (thus ignoring --&amp;gt;&lt;br /&gt;
  &amp;lt;!-- any comments or processing instructions). Note that this template --&amp;gt;&lt;br /&gt;
  &amp;lt;!-- catches the in:TEI from the root template, above. --&amp;gt;&lt;br /&gt;
  &amp;lt;xsl:template match=&amp;quot;*&amp;quot;&amp;gt;&lt;br /&gt;
    &amp;lt;xsl:apply-templates select=&amp;quot;*|text()&amp;quot;/&amp;gt;&lt;br /&gt;
  &amp;lt;/xsl:template&amp;gt;&lt;br /&gt;
  &lt;br /&gt;
  &amp;lt;!-- For any text nodes we process, convert punctuation to a space and --&amp;gt;&lt;br /&gt;
  &amp;lt;!-- spit out the result. (Remember that what we spit out is going to --&amp;gt;&lt;br /&gt;
  &amp;lt;!-- the variable $documentData, not directly to output.) --&amp;gt;&lt;br /&gt;
  &amp;lt;xsl:template match=&amp;quot;text()&amp;quot;&amp;gt;&lt;br /&gt;
    &amp;lt;xsl:variable name=&amp;quot;no-punctuation&amp;quot;&lt;br /&gt;
      select=&amp;quot;translate( ., '“”,.;:?!()-–—…&amp;amp;quot;',&lt;br /&gt;
                            '               ')&amp;quot;/&amp;gt;&lt;br /&gt;
    &amp;lt;xsl:choose&amp;gt;&lt;br /&gt;
      &amp;lt;xsl:when test=&amp;quot;$case-fold&amp;quot;&amp;gt;&lt;br /&gt;
        &amp;lt;xsl:value-of select=&amp;quot;translate( $no-punctuation,&lt;br /&gt;
          'ABCDEFGHIJKLMNOPQRSTUVWXYZ',&lt;br /&gt;
          'abcdefghijklmnopqrstuvwxyz')&amp;quot;/&amp;gt;&lt;br /&gt;
        &amp;lt;!-- Note: if you are using alphabets other than the standard --&amp;gt;&lt;br /&gt;
        &amp;lt;!-- English 26, either add letters above or convert stylesheet --&amp;gt;&lt;br /&gt;
        &amp;lt;!-- to XSLT 2.0 (or 3.0) and use the lower-case() function --&amp;gt;&lt;br /&gt;
      &amp;lt;/xsl:when&amp;gt;&lt;br /&gt;
      &amp;lt;xsl:otherwise&amp;gt;&lt;br /&gt;
        &amp;lt;xsl:value-of select=&amp;quot;$no-punctuation&amp;quot;/&amp;gt;&lt;br /&gt;
      &amp;lt;/xsl:otherwise&amp;gt;&lt;br /&gt;
    &amp;lt;/xsl:choose&amp;gt;&lt;br /&gt;
  &amp;lt;/xsl:template&amp;gt;&lt;br /&gt;
  &lt;br /&gt;
  &amp;lt;!-- Some constructs are summarily ignored: the entire element, including --&amp;gt;&lt;br /&gt;
  &amp;lt;!-- any text content or sub-elements, is just thrown away. --&amp;gt;&lt;br /&gt;
  &amp;lt;xsl:template match=&amp;quot;&lt;br /&gt;
      in:mw | in:fw&lt;br /&gt;
    | in:teiHeader&lt;br /&gt;
    | in:back//in:div[@type='editorial']&lt;br /&gt;
    | in:div[@type='advert']&lt;br /&gt;
    | in:div[@type='castlist']&lt;br /&gt;
    | in:div[@type='colophon']&lt;br /&gt;
    | in:div[@type='contents']&lt;br /&gt;
    | in:div[@type='corrigenda']&lt;br /&gt;
    | in:div[@type='index']&lt;br /&gt;
    | in:abbr[parent::in:choice/in:expan]&lt;br /&gt;
    | in:sic[parent::in:choice/in:corr]&lt;br /&gt;
    | in:orig[parent::in:choice/in:reg]&lt;br /&gt;
    | in:am[parent::in:choice/in:ex]&lt;br /&gt;
    | in:unclear[parent::in:choice and preceding-sibling::in:unclear]&lt;br /&gt;
    | in:add&lt;br /&gt;
    | in:del&lt;br /&gt;
    | in:note[not( @type='authorial' or @resp='author' )]&lt;br /&gt;
    | in:figure&lt;br /&gt;
    | in:stage&lt;br /&gt;
    | in:docImprint&lt;br /&gt;
    | in:bibl&lt;br /&gt;
    | in:dateline&lt;br /&gt;
    | in:respLine&lt;br /&gt;
    | comment()&lt;br /&gt;
    | processing-instruction()&lt;br /&gt;
    | @*&lt;br /&gt;
    &amp;quot;&amp;gt;&lt;br /&gt;
    &amp;lt;!-- It's not really necessary to catch comments, processing instructions, and --&amp;gt;&lt;br /&gt;
    &amp;lt;!-- attributes with this null template, as they will never be processed anyway, --&amp;gt;&lt;br /&gt;
    &amp;lt;!-- because we only process elements and text nodes. But it seems like a good --&amp;gt;&lt;br /&gt;
    &amp;lt;!-- idea to be explicit: nuke 'em. --&amp;gt;&lt;br /&gt;
  &amp;lt;/xsl:template&amp;gt;&lt;br /&gt;
  &lt;br /&gt;
  &amp;lt;!-- Some elements “cause” a word-break, so insert a blank for them --&amp;gt;&lt;br /&gt;
  &amp;lt;xsl:template match=&amp;quot;&lt;br /&gt;
      in:cb&lt;br /&gt;
    | in:lb&lt;br /&gt;
    | in:pb&lt;br /&gt;
    | in:milestone&lt;br /&gt;
    | in:div&lt;br /&gt;
    | in:p&lt;br /&gt;
    &amp;quot;&amp;gt;&lt;br /&gt;
    &amp;lt;xsl:text&amp;gt;&amp;amp;#x20;&amp;lt;/xsl:text&amp;gt;&lt;br /&gt;
    &amp;lt;xsl:apply-templates select=&amp;quot;*|text()&amp;quot;/&amp;gt;&lt;br /&gt;
  &amp;lt;/xsl:template&amp;gt;&lt;br /&gt;
    &lt;br /&gt;
&amp;lt;/xsl:stylesheet&amp;gt;&lt;br /&gt;
&amp;lt;/nowiki&amp;gt;&amp;lt;/pre&amp;gt;&lt;br /&gt;
&lt;br /&gt;
[[Category:XSLT]][[Category:XSLT:1.0]]&lt;/div&gt;</summary>
		<author><name>Syd</name></author>
		
	</entry>
</feed>