-
Notifications
You must be signed in to change notification settings - Fork 143
/
docs.js
141 lines (121 loc) · 4.73 KB
/
docs.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
'use strict'
const {google} = require('googleapis')
const cheerio = require('cheerio')
const xlsx = require('xlsx')
const cache = require('./cache')
const formatter = require('./formatter')
const log = require('./logger')
const {getAuth} = require('./auth')
const {slugify} = require('./utils')
const supportedTypes = new Set(['document', 'spreadsheet', 'text/html'])
const revisionSupportedArr = ['document', 'spreadsheet', 'presentation']
const revisionSupported = new Set(revisionSupportedArr)
const revisionMimeSupported = new Set(revisionSupportedArr.map((x) => `application/vnd.google-apps.${x}`))
exports.fetchDoc = async (id, resourceType, req) => {
const data = await cache.get(id)
if (data && data.content) {
log.info(`CACHE HIT ${req.path}`)
return data.content
}
const auth = await getAuth()
const driveDoc = await fetch({id, resourceType, req}, auth)
const originalRevision = driveDoc[1]
const {html, byline, createdBy, sections} = formatter.getProcessedDocAttributes(driveDoc)
const payload = {html, byline, createdBy, sections}
// cache only information from document body if mimetype supports revision data
if (revisionMimeSupported.has(originalRevision.data.mimeType)) {
cache.add(id, originalRevision.data.modifiedTime, payload)
} else {
console.log(`Skipping cache add: unsupported mimetype ${originalRevision.data.mimeType}`)
}
return payload
}
async function fetchHTMLForId(id, resourceType, req, drive) {
if (!supportedTypes.has(resourceType)) {
return `Library does not support viewing ${resourceType}s yet.`
}
if (resourceType === 'spreadsheet') {
return fetchSpreadsheet(drive, id)
}
if (resourceType === 'text/html') {
return fetchHTML(drive, id)
}
const {data} = await drive.files.export({
fileId: id,
// text/html exports are not suupported for slideshows
mimeType: resourceType === 'presentation' ? 'text/plain' : 'text/html'
})
return data
}
async function fetchOriginalRevisions(id, resourceType, req, drive) {
if (!revisionSupported.has(resourceType)) {
log.info(`Revision data not supported for ${resourceType}:${id}`)
return {data: {lastModifyingUser: {}}} // return mock/empty revision object
}
return drive.revisions.get({
fileId: id,
revisionId: '1',
fields: '*'
}).catch((err) => {
log.warn(`Failed retrieving revision data for ${resourceType}:${id}. Error was:`, err)
return {data: {lastModifyingUser: {}}} // return mock/empty revision object
})
}
async function fetch({id, resourceType, req}, authClient) {
const drive = google.drive({version: 'v3', auth: authClient})
const documentData = await Promise.all([
fetchHTMLForId(id, resourceType, req, drive),
fetchOriginalRevisions(id, resourceType, req, drive)
])
return documentData
}
async function fetchSpreadsheet(drive, id) {
const {data} = await drive.files.export({
fileId: id,
// for mimeTypes see https://developers.google.com/drive/v3/web/manage-downloads#downloading_google_documents
mimeType: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
}, {
// HTML export for sheets is limiting. Instead, download as a buffer and use
// the xlsx library to parse the contents of the file and convert to HTML.
responseType: 'arraybuffer'
})
const spreadsheet = xlsx.read(data, {type: 'buffer'})
const {SheetNames, Sheets} = spreadsheet
// produce some html now since we got back and xls
const html = SheetNames.map((name) => {
const data = Sheets[name]
// get base html from xlsx
const base = xlsx.utils.sheet_to_html(data)
// manipulate with cheerio
const $ = cheerio.load(base)
const table = $('table')
// add header styles
const firstRow = $('table tr:first-of-type')
const withHeader = firstRow.html().replace(/(<\/?)td(\s+|>)/ig, '$1th$2')
firstRow.html(withHeader)
// determine the last row and remove all rows after that
const max = Object.keys(data)
.filter((key) => key.slice(0, 1) !== '!') // ignore special rows in the sheet
.reduce((memo, cell) => {
const row = cell.match(/\d+/)
const value = parseInt(row, 10)
return value > memo ? value : memo
}, 0)
// remove any extra rows at the bottom of the sheet
$(`table tr:nth-of-type(n + ${max + 1})`).remove()
// spreadsheet names become h1 for TOC
const slug = slugify(name)
return [`<h1 id="${slug}">${name}</h1>`, '<table>', table.html(), '</table>'].join('\n')
}, []).join('\n')
// expected to be an array because of the way the google api works
return html
}
// returns raw html from the drive
async function fetchHTML(drive, id) {
const {data} = await drive.files.get({
fileId: id,
supportsTeamDrives: true,
alt: 'media'
})
return data
}