Skip to content

Commit 7694e21

Browse files
committed
feat: add git_resolve function
This resolves a git ref into a json object, which can be passed to update or insert operations to restore previous versions of rows.
1 parent 998f123 commit 7694e21

File tree

8 files changed

+267
-26
lines changed

8 files changed

+267
-26
lines changed

package.json

+10-8
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@
22
"name": "plv8-git",
33
"version": "0.0.1",
44
"description": "Tracks history of rows in postgresql database tables, using in-memory git operations",
5+
"repository": {
6+
"type": "git",
7+
"url": "https://github.com/mmkal/plv8-git.git"
8+
},
9+
"homepage": "https://github.com/mmkal/plv8-git#readme",
10+
"bugs": {
11+
"url": "https://github.com/mmkal/plv8-git/issues"
12+
},
513
"keywords": [
614
"postgresql",
715
"git",
@@ -13,14 +21,6 @@
1321
"memfs",
1422
"event-sourcing"
1523
],
16-
"homepage": "https://github.com/mmkal/plv8-git#readme",
17-
"bugs": {
18-
"url": "https://github.com/mmkal/plv8-git/issues"
19-
},
20-
"repository": {
21-
"type": "git",
22-
"url": "https://github.com/mmkal/plv8-git.git"
23-
},
2424
"license": "Apache-2.0",
2525
"main": "dist/bundle.js",
2626
"types": "dist/index.d.ts",
@@ -32,6 +32,8 @@
3232
"compile": "tsc -p tsconfig.lib.json",
3333
"predocker-apply": "yarn docker-copy-query",
3434
"docker-apply": "yarn psql -f /queries/create-git-functions.sql",
35+
"docker-bash": "docker-compose exec postgres bash",
36+
"docker-psql": "docker-compose exec postgres psql -h localhost -U postgres postgres",
3537
"predocker-copy-query": "yarn docker-exec mkdir -p /queries",
3638
"docker-copy-query": "docker cp queries/create-git-functions.sql plv8-git_postgres_1:/queries",
3739
"docker-exec": "docker exec plv8-git_postgres_1",

readme.md

+105-6
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ The implementation uses [plv8](https://github.com/plv8/plv8) to run JavaScript i
1010
- [Tracking history](#tracking-history)
1111
- [Deletions](#deletions)
1212
- [Configuraton](#configuraton)
13+
- [Restoring previous versions](#restoring-previous-versions)
1314
- [Caveat](#caveat)
1415
- [Implementation](#implementation)
1516
<!-- codegen:end -->
@@ -18,6 +19,7 @@ The implementation uses [plv8](https://github.com/plv8/plv8) to run JavaScript i
1819

1920
To paraphrase [@mayfer's twitter thread](https://twitter.com/mayfer/status/1308606131426582528):
2021

22+
2123
- never have to worry about building edit/delete/undo/backup/recover type features, one generic git-backed [column] is enough
2224

2325
- removes the need to keep additional SQL tables which keep logs of all edit histories.
@@ -99,6 +101,7 @@ This query will return:
99101
"message": "test_table_git_track_trigger: BEFORE UPDATE ROW on public.test_table",
100102
"author": "pguser ([email protected])",
101103
"timestamp": "2020-10-23T12:00:00.000Z",
104+
"oid": "[oid]",
102105
"changes": [
103106
{
104107
"field": "text",
@@ -111,6 +114,7 @@ This query will return:
111114
"message": "test_table_git_track_trigger: BEFORE INSERT ROW on public.test_table",
112115
"author": "pguser ([email protected])",
113116
"timestamp": "2020-10-23T12:00:00.000Z",
117+
"oid": "[oid]",
114118
"changes": [
115119
{
116120
"field": "id",
@@ -253,6 +257,7 @@ where identifier->>'id' = '1'
253257
"message": "test_table_git_track_trigger: BEFORE UPDATE ROW on public.test_table",
254258
"author": "pguser ([email protected])",
255259
"timestamp": "2020-10-23T12:00:00.000Z",
260+
"oid": "[oid]",
256261
"changes": [
257262
{
258263
"field": "text",
@@ -265,6 +270,7 @@ where identifier->>'id' = '1'
265270
"message": "test_table_git_track_trigger: BEFORE INSERT ROW on public.test_table",
266271
"author": "pguser ([email protected])",
267272
"timestamp": "2020-10-23T12:00:00.000Z",
273+
"oid": "[oid]",
268274
"changes": [
269275
{
270276
"field": "id",
@@ -295,7 +301,7 @@ insert into test_table(
295301
)
296302
values(
297303
2,
298-
'a value',
304+
'original value set by alice',
299305
'{ "commit": { "message": "some custom message", "author": { "name": "Alice", "email": "[email protected]" } } }'
300306
)
301307
```
@@ -314,14 +320,15 @@ where id = 2
314320
"message": "some custom message\\n\\ntest_table_git_track_trigger: BEFORE INSERT ROW on public.test_table",
315321
"author": "Alice ([email protected])",
316322
"timestamp": "2020-10-23T12:00:00.000Z",
323+
"oid": "[oid]",
317324
"changes": [
318325
{
319326
"field": "id",
320327
"new": 2
321328
},
322329
{
323330
"field": "text",
324-
"new": "a value"
331+
"new": "original value set by alice"
325332
}
326333
]
327334
}
@@ -334,13 +341,13 @@ where id = 2
334341

335342
```sql
336343
update test_table
337-
set text = 'a new value',
344+
set text = 'a new value set by admin',
338345
git = '{ "commit": { "message": "Changed because the previous value was out-of-date" } }'
339346
where id = 2
340347
```
341348

342349
```sql
343-
select git_log(git, depth := 1)
350+
select git_log(git, depth := 2)
344351
from test_table
345352
where id = 2
346353
```
@@ -353,11 +360,28 @@ where id = 2
353360
"message": "Changed because the previous value was out-of-date\\n\\ntest_table_git_track_trigger: BEFORE UPDATE ROW on public.test_table",
354361
"author": "pguser ([email protected])",
355362
"timestamp": "2020-10-23T12:00:00.000Z",
363+
"oid": "[oid]",
364+
"changes": [
365+
{
366+
"field": "text",
367+
"new": "a new value set by admin",
368+
"old": "original value set by alice"
369+
}
370+
]
371+
},
372+
{
373+
"message": "some custom message\\n\\ntest_table_git_track_trigger: BEFORE INSERT ROW on public.test_table",
374+
"author": "Alice ([email protected])",
375+
"timestamp": "2020-10-23T12:00:00.000Z",
376+
"oid": "[oid]",
356377
"changes": [
378+
{
379+
"field": "id",
380+
"new": 2
381+
},
357382
{
358383
"field": "text",
359-
"new": "a new value",
360-
"old": "a value"
384+
"new": "original value set by alice"
361385
}
362386
]
363387
}
@@ -367,6 +391,80 @@ where id = 2
367391
```
368392

369393
By setting `depth := 1`, only the most recent change is returned.
394+
395+
### Restoring previous versions
396+
397+
`git_resolve` gives you a json representation of a prior version of a row, which can be used for backup and restore. The first argument is a `git` json value, the second value is a valid git ref string.
398+
399+
Combine it with `git_log` to get a previous version - the below query uses `->1->'oid'` to get the oid from the second item in the log array:
400+
401+
```sql
402+
select git_resolve(git, git_log(git)->1->>'oid')
403+
from test_table
404+
where id = 2
405+
```
406+
407+
```json
408+
[
409+
{
410+
"git_resolve": {
411+
"id": 2,
412+
"text": "original value set by alice"
413+
}
414+
}
415+
]
416+
```
417+
418+
This can be used in an update query to revert a change:
419+
420+
```sql
421+
update test_table set (id, text) =
422+
(
423+
select id, text
424+
from json_populate_record(
425+
null::test_table, (
426+
select git_resolve(git, git_log(git)->1->>'oid')
427+
from test_table
428+
where id = 2
429+
)
430+
)
431+
)
432+
where id = 2
433+
returning id, text
434+
```
435+
436+
```json
437+
[
438+
{
439+
"id": 2,
440+
"text": "original value set by alice"
441+
}
442+
]
443+
```
444+
445+
Or a similar technique can restore a deleted item:
446+
447+
```sql
448+
insert into test_table
449+
select * from json_populate_record(
450+
null::test_table,
451+
(
452+
select git_resolve(git, git_log(git, depth := 1)->0->>'oid')
453+
from deleted_history
454+
where tablename = 'test_table' and identifier->>'id' = '1'
455+
)
456+
)
457+
returning id, text
458+
```
459+
460+
```json
461+
[
462+
{
463+
"id": 1,
464+
"text": "updated content"
465+
}
466+
]
467+
```
370468
<!-- codegen:end -->
371469

372470
## Caveat
@@ -375,6 +473,7 @@ By setting `depth := 1`, only the most recent change is returned.
375473
- It hasn't been performance-tested yet. It works well for rows with small, easily-json-stringifiable data. Large, frequently updated rows may hit issues.
376474
- It currently uses the `JSON` data type to store a serialised copy of the `.git` repo folder. This can likely be optimised to use `BYTEA` or another data type.
377475
- It uses several tools that were _not_ built with each other in mind (although each is well-designed and flexible enough for them to play nice without too many problems). See the [implementation section](#implementation)
476+
- It's still in v0, so breaking changes may occur.
378477

379478
## Implementation
380479

scripts/generate-queries.ts

+7
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,13 @@ export const getQuery = (js: string) => {
2929
${quotes}
3030
language plv8;
3131
32+
create or replace function git_resolve(git_json json, ref text) returns json as
33+
${quotes}
34+
${getFunctionBody(js)}
35+
return module.exports.gitResolve(git_json, ref)
36+
${quotes}
37+
language plv8;
38+
3239
create or replace function git_log(git_json json, depth int) returns json as
3340
${quotes}
3441
${getFunctionBody(js)}

src/git.ts

+35-3
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import * as serializer from './serializer'
55
import {SyncPromise} from './sync-promise'
66
import {PG_Vars} from './pg-types'
77
import {setupMemfs} from './fs'
8+
import {plog} from './pg-log'
89

910
function writeGitFiles(gitFiles: any, fs: memfs.IFs) {
1011
Object.keys(gitFiles).map(filepath => {
@@ -70,6 +71,9 @@ export const rowToRepo = ({OLD, NEW, ...pg}: PG_Vars) => {
7071
}))
7172
}
7273

74+
type TreeInfo = {type: string; content: string; oid: string}
75+
type WalkResult = {filepath: string; ChildInfo: TreeInfo; ParentInfo?: TreeInfo}
76+
7377
/**
7478
* When passed a json object representing the `.git` folder of a repo, returns a list
7579
* of changes made to corresponding row. Optionally, pass `depth` to limit how far back
@@ -79,9 +83,6 @@ export const gitLog = (gitRepoJson: object, depth?: number) => {
7983
const {fs} = setupMemfs()
8084
const repo = {fs, dir: '/repo'}
8185

82-
type TreeInfo = {type: string; content: string; oid: string}
83-
type WalkResult = {filepath: string; ChildInfo: TreeInfo; ParentInfo?: TreeInfo}
84-
8586
return SyncPromise.resolve()
8687
.then(() => writeGitFiles(gitRepoJson, fs))
8788
.then(() => git.log({...repo, depth}))
@@ -103,6 +104,7 @@ export const gitLog = (gitRepoJson: object, depth?: number) => {
103104
message: e.commit.message.trim(),
104105
author: `${e.commit.author.name} (${e.commit.author.email})`,
105106
timestamp: new Date(e.commit.author.timestamp * 1000).toISOString(),
107+
oid: e.oid,
106108
changes: results
107109
.filter(
108110
r => r.ChildInfo?.type === 'blob' && r.filepath !== '.' && r.ChildInfo.oid !== r.ParentInfo?.oid,
@@ -118,12 +120,42 @@ export const gitLog = (gitRepoJson: object, depth?: number) => {
118120
})
119121
}
120122

123+
/**
124+
* Resolves a git ref into a dictionary of values, which can be passed to `INSERT` or `UPDATE` operations
125+
* @param gitRepoJson a json object representing the `.git` folder of a repo
126+
* @param ref a git ref string
127+
*/
128+
export const gitResolve = (gitRepoJson: object, ref: string) => {
129+
const {fs} = setupMemfs()
130+
const repo = {fs, dir: '/repo'}
131+
132+
return SyncPromise.resolve()
133+
.then(() => writeGitFiles(gitRepoJson, fs))
134+
.then(() =>
135+
git.walk({
136+
...repo,
137+
trees: [git.TREE({ref})],
138+
map: (filepath, entries) => resolveTree(entries![0])!.then(tree => ({filepath, tree})),
139+
}),
140+
)
141+
.then((results: Array<{filepath: string; tree: ResolvedTree}>) =>
142+
results
143+
.filter(r => r.tree.type === 'blob' && r.filepath !== '.')
144+
.reduce(
145+
(dict, next) => Object.assign(dict, {[next.filepath]: serializer.parse(next.tree.content)}),
146+
{} as Record<string, any>,
147+
),
148+
)
149+
}
150+
121151
/**
122152
* for some reason A.content() converts from a buffer to {"0": 100, "1": 101} format.
123153
* Object.values(...) converts back to a number array. Wasteful, but works for now.
124154
*/
125155
const btos = (obj: any) => Buffer.from(Object.values<number>(obj || {})).toString()
126156

157+
type PromiseResult<P> = P extends Promise<infer X> ? X : never
158+
type ResolvedTree = PromiseResult<ReturnType<typeof resolveTree>>
127159
/** gets the type, content and oid for a `WalkerEntry` */
128160
const resolveTree = (tree: git.WalkerEntry | undefined) => {
129161
const promises = tree && [tree.type(), tree.content().then(btos), tree.oid()]

src/index.ts

+1
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@ export const syncify = <A extends unknown[], R>(func: (...args: A) => Promise<R>
1414

1515
export const rowToRepo = syncify(git.rowToRepo)
1616
export const gitLog = syncify(git.gitLog)
17+
export const gitResolve = syncify(git.gitResolve)

src/pg-log.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ declare const plv8: any, WARNING: any
55
/**
66
* Wrapper for `plv8.elog` which outputs to a `docker-compose up` console.
77
*/
8-
export const plog = (...args: any[]) => {
8+
export const plog = (...args: any[]): undefined => {
99
if (typeof plv8 === 'undefined') return
1010
// inspect is an easy way to pretty-print any value
1111
const s = inspect(args).slice(1, -1).trim()

test/result-printer.ts

+4-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ export const fuzzifyDate = (s: string) => {
99

1010
const gitRepoHashes: string[] = []
1111
/**
12-
* JSON.stringify with a replacer that returns stable values for byte arrays, git repo json representations and timestamps.
12+
* JSON.stringify with a replacer that returns stable values for byte arrays, oids, git repo json representations and timestamps.
1313
* Useful for jest snapshot testing - the result is pretty human readable and stays the same across runs.
1414
*/
1515
export const readableJson = (o: unknown) => {
@@ -43,6 +43,9 @@ export const readableJson = (o: unknown) => {
4343
})
4444
return copy
4545
}
46+
if (k === 'oid' && typeof v === 'string') {
47+
return '[oid]'
48+
}
4649
if (k === 'timestamp' && typeof v === 'string') {
4750
return fuzzifyDate(v).toISOString()
4851
}

0 commit comments

Comments
 (0)