Skip to content

Commit

Permalink
TEI: also write pos in PC
Browse files Browse the repository at this point in the history
  • Loading branch information
PrinsINT committed Oct 17, 2024
1 parent d65a8c4 commit 3ad1ace
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ class LayerToTEIConverter(
val alphaNumeric = Regex("""[a-zA-Z0-9]""")
if (!term.literals.contains(alphaNumeric)) {
// Interpret as punctuation only if it doesn't contain any alphanumeric characters
writer.writeRaw("<pc xml:id=\"${term.targets[0].id}\">${getLiteral()}</pc>")
val pos = term.posOrEmpty.escapeXML()
writer.writeRaw("<pc pos=\"$pos\" xml:id=\"${term.targets[0].id}\">${getLiteral()}</pc>")
} else {
// Clear the pos and interpret as <w>
val lemma = term.lemmaOrEmpty.escapeXML()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@ import org.ivdnt.galahad.data.layer.WordForm
import org.ivdnt.galahad.evaluation.comparison.LayerComparison.Companion.truncatedPcMatch
import org.ivdnt.galahad.port.folia.export.deepcopy
import org.ivdnt.galahad.port.xml.getPlainTextContent
import org.ivdnt.galahad.taggers.TaggerStore
import org.ivdnt.galahad.tagset.TagsetStore
import org.ivdnt.galahad.util.*
import org.w3c.dom.Document
import org.w3c.dom.Element
Expand Down Expand Up @@ -235,12 +233,12 @@ open class TEITextMerger(
val termToAdd = layer.termForWordForm(wf)

val wTag = if (layer.tagset.punctuationTags.contains(termToAdd.pos) && !termToAdd.literals.contains(alphaNumeric)) {
val n = document.createElement("pc")
n
val element = document.createElement("pc")
element
} else {
val n = document.createElement("w")
n.setAttribute("lemma", termToAdd.lemmaOrEmpty)
n
val element = document.createElement("w")
element.setAttribute("lemma", termToAdd.lemmaOrEmpty)
element
}

// Empty pos if it is a PC and it contains alphanumeric characters (so it can't be PC anyway).
Expand Down

0 comments on commit 3ad1ace

Please sign in to comment.