@@ -3,6 +3,7 @@ import path from "node:path";
3
3
4
4
import express from "express" ;
5
5
import { fdir } from "fdir" ;
6
+ import { execSync , spawn } from "node:child_process" ;
6
7
7
8
import { getPopularities , Document , Translation } from "../content/index.js" ;
8
9
import {
@@ -59,8 +60,52 @@ function packageTranslationDifferences(translationDifferences) {
59
60
return { total, countByType } ;
60
61
}
61
62
63
+ type RecentRepoHashType = string ;
64
+
62
65
const _foundDocumentsCache = new Map ( ) ;
66
+ const sourceCommitCache = fs . existsSync ( "./source-commit.json" )
67
+ ? new Map < string , number | RecentRepoHashType > (
68
+ Object . entries (
69
+ JSON . parse ( fs . readFileSync ( "./source-commit.json" , "utf8" ) )
70
+ )
71
+ )
72
+ : new Map < string , number > ( ) ;
73
+ const commitFiles = new Map < string , string [ ] > ( ) ;
74
+ let commitFilesOldest = "HEAD" ;
63
75
export async function findDocuments ( { locale } ) {
76
+ function checkCacheValidation ( prevCache : Map < any , any > ) : void {
77
+ const contentHash = getRecentRepoHash ( CONTENT_ROOT ) ;
78
+ const translatedContentHash = getRecentRepoHash ( CONTENT_TRANSLATED_ROOT ) ;
79
+
80
+ function getRecentRepoHash ( cwd : string ) : string {
81
+ return execSync ( "git rev-parse HEAD" , { cwd } ) . toString ( ) . trimEnd ( ) ;
82
+ }
83
+ function updateRecentRepoHash ( cache : Map < string , any > ) : void {
84
+ cache . set ( CONTENT_ROOT , contentHash ) ;
85
+ cache . set ( CONTENT_TRANSLATED_ROOT , translatedContentHash ) ;
86
+ }
87
+ function isValidCache ( cache : Map < string , any > ) : boolean {
88
+ return (
89
+ cache . has ( CONTENT_ROOT ) &&
90
+ cache . has ( CONTENT_TRANSLATED_ROOT ) &&
91
+ cache . get ( CONTENT_ROOT ) === contentHash &&
92
+ cache . get ( CONTENT_TRANSLATED_ROOT ) === translatedContentHash
93
+ ) ;
94
+ }
95
+
96
+ if ( isValidCache ( sourceCommitCache ) ) {
97
+ return ;
98
+ }
99
+ if ( ! isValidCache ( prevCache ) ) {
100
+ prevCache . clear ( ) ;
101
+ sourceCommitCache . clear ( ) ;
102
+ commitFiles . clear ( ) ;
103
+ commitFilesOldest = "HEAD" ;
104
+ updateRecentRepoHash ( prevCache ) ;
105
+ updateRecentRepoHash ( sourceCommitCache ) ;
106
+ }
107
+ }
108
+
64
109
const counts = {
65
110
// Number of documents found that aren't skipped
66
111
found : 0 ,
@@ -81,6 +126,7 @@ export async function findDocuments({ locale }) {
81
126
} ) ;
82
127
counts . total = documentsFound . count ;
83
128
129
+ checkCacheValidation ( _foundDocumentsCache ) ;
84
130
if ( ! _foundDocumentsCache . has ( locale ) ) {
85
131
_foundDocumentsCache . set ( locale , new Map ( ) ) ;
86
132
}
@@ -91,7 +137,7 @@ export async function findDocuments({ locale }) {
91
137
92
138
if ( ! cache . has ( filePath ) || cache . get ( filePath ) . mtime < mtime ) {
93
139
counts . cacheMisses ++ ;
94
- const document = getDocument ( filePath ) ;
140
+ const document = await getDocument ( filePath ) ;
95
141
cache . set ( filePath , {
96
142
document,
97
143
mtime,
@@ -114,14 +160,20 @@ export async function findDocuments({ locale }) {
114
160
took,
115
161
} ;
116
162
163
+ fs . writeFileSync (
164
+ "./source-commit.json" ,
165
+ JSON . stringify ( Object . fromEntries ( sourceCommitCache ) ) ,
166
+ "utf8"
167
+ ) ;
168
+
117
169
return {
118
170
counts,
119
171
times,
120
172
documents,
121
173
} ;
122
174
}
123
175
124
- function getDocument ( filePath ) {
176
+ async function getDocument ( filePath ) {
125
177
function packagePopularity ( document , parentDocument ) {
126
178
return {
127
179
value : document . metadata . popularity ,
@@ -141,34 +193,162 @@ function getDocument(filePath) {
141
193
} ;
142
194
}
143
195
144
- function packageEdits ( document , parentDocument ) {
145
- const commitURL = getLastCommitURL (
146
- document . fileInfo . root ,
147
- document . metadata . hash
148
- ) ;
149
- const parentCommitURL = getLastCommitURL (
150
- parentDocument . fileInfo . root ,
151
- parentDocument . metadata . hash
152
- ) ;
153
- const modified = document . metadata . modified ;
154
- const parentModified = parentDocument . metadata . modified ;
196
+ function recordInvalidSourceCommit (
197
+ fileFolder : string ,
198
+ commitHash : string ,
199
+ message : string
200
+ ) {
201
+ const filePath = "./source-commit-invalid-report.txt" ;
202
+ const errorMessage = `- ${ commitHash } commit hash is invalid in ${ fileFolder } : ${ message . replace (
203
+ / \n / g,
204
+ " "
205
+ ) } `;
206
+ if ( ! fs . existsSync ( filePath ) ) {
207
+ fs . writeFileSync ( filePath , "" ) ;
208
+ }
209
+
210
+ fs . appendFile ( filePath , `${ errorMessage } \n` , function ( err ) {
211
+ if ( err ) throw err ;
212
+ } ) ;
213
+ }
214
+
215
+ class GitError extends Error {
216
+ constructor ( stderr : string ) {
217
+ super ( stderr ) ;
218
+ this . name = "GitError" ;
219
+ }
220
+ }
221
+
222
+ function fillMemStore ( commitHash : string ) {
223
+ return new Promise ( ( resolve , reject ) => {
224
+ const git = spawn (
225
+ "git" ,
226
+ [
227
+ "log" ,
228
+ "--pretty=format:%x00%x00%H" ,
229
+ "--name-only" ,
230
+ "-z" ,
231
+ `${ commitHash } ..${ commitFilesOldest } ` ,
232
+ ] ,
233
+ {
234
+ cwd : CONTENT_ROOT ,
235
+ }
236
+ ) ;
237
+
238
+ let stdoutBuffer = "" ;
239
+
240
+ git . stdout . on ( "data" , ( data ) => {
241
+ stdoutBuffer += data . toString ( ) ;
242
+ const commits = stdoutBuffer . split ( "\0\0" ) ;
243
+ const partial = commits . pop ( ) ;
244
+ stdoutBuffer = partial ;
245
+ commits . forEach ( ( commit ) => {
246
+ const [ dirtyHash , files ] = commit . split ( "\n" ) ;
247
+ // necessary for commits following those with no changes:
248
+ const hash = dirtyHash . replace ( / \0 / g, "" ) ;
249
+ commitFiles . set ( hash , files ? files . split ( "\0" ) : [ ] ) ;
250
+ } ) ;
251
+ } ) ;
252
+
253
+ let stderr = "" ;
254
+
255
+ git . stderr . on ( "data" , ( data ) => {
256
+ stderr += data . toString ( ) ;
257
+ } ) ;
258
+
259
+ git . on ( "close" , ( code ) => {
260
+ commitFilesOldest = commitHash ;
261
+ code ? reject ( new GitError ( stderr ) ) : resolve ( null ) ;
262
+ } ) ;
263
+ } ) ;
264
+ }
265
+
266
+ async function getCommitBehindFromLatest (
267
+ fileFolder : string ,
268
+ parentFilePath : string ,
269
+ commitHash : string
270
+ ) : Promise < number > {
271
+ if ( sourceCommitCache . has ( fileFolder ) ) {
272
+ return sourceCommitCache . get ( fileFolder ) as number ;
273
+ }
274
+
275
+ try {
276
+ let count = 0 ;
277
+ if ( ! commitFiles . has ( commitHash ) ) {
278
+ await fillMemStore ( commitHash ) ;
279
+ }
280
+ for ( const [ hash , files ] of commitFiles . entries ( ) ) {
281
+ if ( hash === commitHash ) {
282
+ if ( ! files . includes ( parentFilePath ) ) {
283
+ recordInvalidSourceCommit (
284
+ fileFolder ,
285
+ commitHash ,
286
+ "file isn't changed in this commit"
287
+ ) ;
288
+ }
289
+ break ;
290
+ }
291
+ if ( files . includes ( parentFilePath ) ) count ++ ;
292
+ }
293
+ sourceCommitCache . set ( fileFolder , count ) ;
294
+ } catch ( err ) {
295
+ if ( err instanceof GitError ) {
296
+ recordInvalidSourceCommit ( fileFolder , commitHash , err . message ) ;
297
+ } else {
298
+ throw err ;
299
+ }
300
+ }
301
+
302
+ return sourceCommitCache . get ( fileFolder ) as number ;
303
+ }
304
+
305
+ async function packageEdits ( document , parentDocument ) {
306
+ const {
307
+ fileInfo : { root : fileRoot , folder : fileFolder } ,
308
+ metadata : { hash : fileHash , modified, l10n } ,
309
+ } = document ;
310
+ const {
311
+ fileInfo : { root : parentFileRoot , path : parentFilePath } ,
312
+ metadata : { hash : parentFileHash , parentModified } ,
313
+ } = parentDocument ;
314
+
315
+ const commitURL = getLastCommitURL ( fileRoot , fileHash ) ;
316
+ const parentCommitURL = getLastCommitURL ( parentFileRoot , parentFileHash ) ;
317
+ let sourceCommitURL ;
318
+ let sourceCommitsBehindCount ;
319
+
320
+ if ( l10n ?. sourceCommit ) {
321
+ sourceCommitURL = getLastCommitURL ( CONTENT_ROOT , l10n . sourceCommit ) ;
322
+ sourceCommitsBehindCount = await getCommitBehindFromLatest (
323
+ fileFolder ,
324
+ parentFilePath . replace ( parentFileRoot , "files" ) ,
325
+ l10n . sourceCommit
326
+ ) ;
327
+ }
328
+
155
329
return {
156
330
commitURL,
157
331
parentCommitURL,
158
332
modified,
159
333
parentModified,
334
+ sourceCommitURL,
335
+ sourceCommitsBehindCount,
160
336
} ;
161
337
}
162
338
163
339
// We can't just open the `index.json` and return it like that in the XHR
164
340
// payload. It's too much stuff and some values need to be repackaged/
165
341
// serialized or some other transformation computation.
166
- function packageDocument ( document , englishDocument , translationDifferences ) {
342
+ async function packageDocument (
343
+ document ,
344
+ englishDocument ,
345
+ translationDifferences
346
+ ) {
167
347
const mdn_url = document . url ;
168
348
const { title } = document . metadata ;
169
349
const popularity = packagePopularity ( document , englishDocument ) ;
170
350
const differences = packageTranslationDifferences ( translationDifferences ) ;
171
- const edits = packageEdits ( document , englishDocument ) ;
351
+ const edits = await packageEdits ( document , englishDocument ) ;
172
352
return { popularity, differences, edits, mdn_url, title } ;
173
353
}
174
354
@@ -191,7 +371,7 @@ function getDocument(filePath) {
191
371
) ) {
192
372
differences . push ( difference ) ;
193
373
}
194
- return packageDocument ( document , englishDocument , differences ) ;
374
+ return await packageDocument ( document , englishDocument , differences ) ;
195
375
}
196
376
197
377
const _defaultLocaleDocumentsCache = new Map ( ) ;
0 commit comments