-
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse-text-cite.ts
executable file
·116 lines (97 loc) · 2.69 KB
/
parse-text-cite.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import nearley from 'nearley'
import grammar from './apa.js'
import { Data as CSL } from 'csl-json'
export interface Citation {
citationId: string
citationItems: CitationItem[]
properties: Properties
originalText?: string
}
export interface Properties {
noteIndex: number
mode?: string
}
export interface CitationItem {
itemData: CSL
id: string
prefix?: string
suffix?: string
infix?: string
label?: string
locator?: string
}
interface Options {
showAll?: boolean
log?: boolean
}
export const parser = new nearley.Parser(nearley.Grammar.fromCompiled(grammar))
export const parseTextCite = (string: string, options?: Options) => {
const parser = new nearley.Parser(nearley.Grammar.fromCompiled(grammar))
try {
parser.feed(string)
} catch (err) {
if (options?.log !== false) {
console.warn(string)
;(err as { message: string }).message.match(/(Rp|Lp)/i)
? console.warn(err)
: console.warn(err)
}
}
const results = parser.results as unknown[][]
if (!results) {
return [string]
}
if (options?.showAll) {
return results as (Citation | string)[][]
}
// find the item that has the smallest total length of strings
let narrowResults = results[0]
for (let i = 0; i < results.length; i++) {
let accLength = 0
for (let j = 0; j < results[i].length; j++) {
const curr = results[i][j]
if (typeof curr === 'string') {
accLength += curr.length
}
}
let currLength = 0
for (let j = 0; j < narrowResults.length; j++) {
const curr = narrowResults[j]
if (typeof curr === 'string') {
currLength += curr.length
}
}
if (currLength > accLength) {
narrowResults = results[i]
}
}
if (!narrowResults) {
return [string]
}
// I'm too bad at parsing and I want the original value of the thing
return recoverOriginalCitation(narrowResults as (Citation | string)[], string)
}
function recoverOriginalCitation(cite: (string | Citation)[], ogText: string) {
const narrowString = cite.reduce((acc: string, curr) => {
if (typeof curr !== 'string') {
return acc
}
return acc.replace(curr, '')
}, ogText)
const originalCites = narrowString
.split(')')
.filter((c) => c)
.map((c: string, idx, arr) => `${c}${arr.length > 1 && idx === arr.length - 1 ? '' : ')'}`)
let stupidCounterYouShouldKnowBetter = 0
const reduced = cite.reduce((acc: (string | Citation)[], curr) => {
if (typeof curr === 'string') {
acc.push(curr)
return acc
}
curr.originalText = originalCites[stupidCounterYouShouldKnowBetter]
stupidCounterYouShouldKnowBetter++
acc.push(curr)
return acc
}, [])
return reduced
}