Difference between revisions of "Cypher"

From TEIWiki
Jump to navigation Jump to search
(Created page with " // Export TEI-Graph to Standoff-Property-JSON-Format match path=(d:XmlDocument)-[:NE*]->(e:XmlCharacters) where not (e)-[:NE]->() with tail(nodes(path)) as words, d with reduce(...")
 
 
(5 intermediate revisions by the same user not shown)
Line 1: Line 1:
 
+
<code>
// Export TEI-Graph to Standoff-Property-JSON-Format
+
// Export TEI-Graph to Standoff-Property-JSON-Format by Stefan Armbruster
match path=(d:XmlDocument)-[:NE*]->(e:XmlCharacters)
+
match path=(d:XmlDocument)-[:NE*]->(e:XmlCharacters)
where not (e)-[:NE]->()
+
where not (e)-[:NE]->()
with tail(nodes(path)) as words, d
+
with tail(nodes(path)) as words, d
with reduce(s="", x in words| s + x.text ) as allText, d
+
with reduce(s="", x in words| s + x.text ) as allText, d
call apoc.path.expandConfig(d,{
+
call apoc.path.expandConfig(d,{
  relationshipFilter: '<IS_CHILD_OF',
+
relationshipFilter: '<IS_CHILD_OF',
  labelFilter: 'XmlTag',
+
labelFilter: 'XmlTag',
  bfs: false,
+
bfs: false,
  minLevel: 1
+
minLevel: 1
}) yield path
+
}) yield path
with allText, path, nodes(path)[-1] as this
+
with allText, path, nodes(path)[-1] as this
MATCH p=(this)-[:NEXT*]->(x)
+
MATCH p=(this)-[:NEXT*]->(x)
where (x)-[:LAST_CHILD_OF*]->(this) and any(x in nodes(p) WHERE x:XmlCharacters)
+
where (x)-[:LAST_CHILD_OF*]->(this) and any(x in nodes(p) WHERE x:XmlCharacters)
with allText, this, collect(p)[-1] as longest
+
with allText, this, collect(p)[-1] as longest
with allText, this, [x in nodes(longest) where x:XmlCharacters] as xmlCharacters
+
with allText, this, [x in nodes(longest) where x:XmlCharacters] as xmlCharacters
with allText, this,  
+
with allText, this,  
  apoc.coll.min([x in xmlCharacters | x.startIndex]) as min,  
+
apoc.coll.min([x in xmlCharacters | x.startIndex]) as min,  
  apoc.coll.max([x in xmlCharacters | x.endIndex]) as max,  
+
apoc.coll.max([x in xmlCharacters | x.endIndex]) as max,  
  apoc.text.join([x in xmlCharacters | x.text], "") as text
+
apoc.text.join([x in xmlCharacters | x.text], "") as text
with allText, {
+
with allText, {
  index:id(this),  
+
index:id(this),  
  startIndex: min,  
+
startIndex: min,  
  endIndex: max,
+
endIndex: max,
  text: text,
+
text: text,
  type: this._name,
+
type: this._name,
  attributes: apoc.map.fromPairs([x in keys(this) WHERE not x starts with "_" | [x, this[x]] ])
+
attributes: apoc.map.fromPairs([x in keys(this) WHERE not x starts with "_" | [x, this[x]] ])
} as standoffProperty
+
} as standoffProperty
return {text: allText, properties: collect(standoffProperty)};
+
return {text: allText, properties: collect(standoffProperty)};
 +
</code>

Latest revision as of 13:55, 3 March 2022

// Export TEI-Graph to Standoff-Property-JSON-Format by Stefan Armbruster
match path=(d:XmlDocument)-[:NE*]->(e:XmlCharacters)
where not (e)-[:NE]->()
with tail(nodes(path)) as words, d
with reduce(s="", x in words| s + x.text ) as allText, d
call apoc.path.expandConfig(d,{
relationshipFilter: '<IS_CHILD_OF',
labelFilter: 'XmlTag',
bfs: false,
minLevel: 1
}) yield path
with allText, path, nodes(path)[-1] as this
MATCH p=(this)-[:NEXT*]->(x)
where (x)-[:LAST_CHILD_OF*]->(this) and any(x in nodes(p) WHERE x:XmlCharacters)
with allText, this, collect(p)[-1] as longest
with allText, this, [x in nodes(longest) where x:XmlCharacters] as xmlCharacters
with allText, this, 
apoc.coll.min([x in xmlCharacters | x.startIndex]) as min, 
apoc.coll.max([x in xmlCharacters | x.endIndex]) as max, 
apoc.text.join([x in xmlCharacters | x.text], "") as text
with allText, {
index:id(this), 
startIndex: min, 
endIndex: max,
text: text,
type: this._name,
attributes: apoc.map.fromPairs([x in keys(this) WHERE not x starts with "_" | [x, this[x]] ])
} as standoffProperty
return {text: allText, properties: collect(standoffProperty)};