Skip to content

Commit

Permalink
Add import-json and export-json commands for importing and exporting …
Browse files Browse the repository at this point in the history
…fully detailed histories (#271)
  • Loading branch information
ddworken authored Dec 31, 2024
1 parent 7afdc9f commit ffc224e
Show file tree
Hide file tree
Showing 6 changed files with 219 additions and 6 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ A few configuration options:

hiSHtory imports your existing shell history by default. If for some reason this didn't work (e.g. you had your shell history in a non-standard file), you can import it by piping it into `hishtory import` (e.g. `cat ~/.my_history | hishtory import`).

If you'd like to import rich history data (e.g. because you previously tracked other history metadata with another tool), you can use `hishtory import-json`. See `hishtory import-json --help` for more information.

</blockquote></details>

<details>
Expand Down Expand Up @@ -240,7 +242,7 @@ Debug logs are stored in `~/.hishtory/hishtory.log`. If you run into any issues,
<details>
<summary>Uninstalling</summary><blockquote>

If you'd like to uninstall hishtory, just run `hishtory uninstall`. Note that this deletes the SQLite DB storing your history, so consider running a `hishtory export` first.
If you'd like to uninstall hishtory, just run `hishtory uninstall`. Note that this deletes the SQLite DB storing your history, so consider running a `hishtory export` or a `hishtory export-json` first.

Note that if you're experiencing any issues with hiSHtory, try running `hishtory update` first! Performance and reliability is always improving, and we highly value [your feedback](https://github.com/ddworken/hishtory/issues).

Expand Down
35 changes: 35 additions & 0 deletions client/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3435,4 +3435,39 @@ func TestStatusFullConfig(t *testing.T) {
testutils.CompareGoldens(t, out, "TestStatusFullConfig")
}

func TestExportJson(t *testing.T) {
markTestForSharding(t, 20)
defer testutils.BackupAndRestore(t)()
tester := zshTester{}
installHishtory(t, tester, "")

// Create some history entries
db := hctx.GetDb(hctx.MakeContext())
e1 := testutils.MakeFakeHistoryEntry("echo synth1")
e1.StartTime = time.Unix(1234567, 0)
require.NoError(t, db.Create(e1).Error)
e2 := testutils.MakeFakeHistoryEntry("echo synth2")
e1.StartTime = time.Unix(1244567, 0)
require.NoError(t, db.Create(e2).Error)

// Run export-json
out := tester.RunInteractiveShell(t, `hishtory export-json | grep synth | grep -v export-json`)
testutils.CompareGoldens(t, out, "TestExportJson")
}

func TestImportJson(t *testing.T) {
markTestForSharding(t, 20)
defer testutils.BackupAndRestore(t)()
tester := zshTester{}
installHishtory(t, tester, "")

// Run an import with the export-json golden
out := tester.RunInteractiveShell(t, `cat client/testdata/TestExportJson | hishtory import-json`)
require.Equal(t, "Imported 2 history entries\n", out)

// Run export-json
out = tester.RunInteractiveShell(t, `hishtory export-json | grep synth | grep -v export-json`)
testutils.CompareGoldens(t, out, "TestExportJson")
}

// TODO: somehow test/confirm that hishtory works even if only bash/only zsh is installed
76 changes: 76 additions & 0 deletions client/cmd/export.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
package cmd

import (
"context"
"encoding/json"
"fmt"
"io"
"os"

"github.com/ddworken/hishtory/client/data"
"github.com/ddworken/hishtory/client/hctx"
"github.com/ddworken/hishtory/client/lib"

"github.com/spf13/cobra"
)

var exportJsonCmd = &cobra.Command{
Use: "export-json",
Short: "Export history entries formatted in JSON lines format (as accepted by hishtory import-json, and easily parsable by other tools)",
Run: func(cmd *cobra.Command, args []string) {
ctx := hctx.MakeContext()
err := exportToJson(ctx, os.Stdout)
lib.CheckFatalError(err)
},
}

func structToMap(entry data.HistoryEntry) (map[string]interface{}, error) {
inrec, err := json.Marshal(entry)
if err != nil {
return nil, err
}
var m map[string]interface{}
err = json.Unmarshal(inrec, &m)
return m, err
}

func exportToJson(ctx context.Context, w io.Writer) error {
db := hctx.GetDb(ctx)
chunkSize := 1000
offset := 0
for {
entries, err := lib.SearchWithOffset(ctx, db, "", chunkSize, offset)
if err != nil {
return fmt.Errorf("failed to search for history entries with offset=%d: %w", offset, err)
}
if len(entries) == 0 {
break
}
for _, entry := range entries {
m, err := structToMap(*entry)
if err != nil {
return err
}
delete(m, "device_id")
delete(m, "entry_id")
j, err := json.Marshal(m)
if err != nil {
return err
}
_, err = w.Write(j)
if err != nil {
return err
}
_, err = w.Write([]byte("\n"))
if err != nil {
return err
}
}
offset += chunkSize
}
return nil
}

func init() {
rootCmd.AddCommand(exportJsonCmd)
}
98 changes: 97 additions & 1 deletion client/cmd/import.go
Original file line number Diff line number Diff line change
@@ -1,19 +1,26 @@
package cmd

import (
"context"
"encoding/json"
"fmt"
"os"
"os/user"
"time"

"github.com/ddworken/hishtory/client/data"
"github.com/ddworken/hishtory/client/hctx"
"github.com/ddworken/hishtory/client/lib"

"github.com/google/uuid"
"github.com/spf13/cobra"
)

var importCmd = &cobra.Command{
Use: "import",
Hidden: true,
Short: "Re-import history entries from your existing shell history",
Long: "Note that you must pipe commands to be imported in via stdin. For example `history | hishtory import`.",
Long: "Note that you may also pipe commands to be imported in via stdin. For example `history | hishtory import`.",
Run: func(cmd *cobra.Command, args []string) {
ctx := hctx.MakeContext()
numImported, err := lib.ImportHistory(ctx, true, true)
Expand All @@ -24,6 +31,95 @@ var importCmd = &cobra.Command{
},
}

var importJsonCmd = &cobra.Command{
Use: "import-json",
Short: "Import history entries formatted in JSON lines format into hiSHtory",
Long: "Data is read from stdin. For example: `cat data.txt | hishtory import-json`.\n\nExample JSON format:\n\n```\n" +
"{\"command\":\"echo foo\"}\n" +
"{\"command\":\"echo bar\", \"current_working_directory\": \"/tmp/\"}\n" +
"{\"command\":\"ls\",\"current_working_directory\":\"/tmp/\",\"local_username\":\"david\",\"hostname\":\"foo\",\"home_directory\":\"/Users/david\",\"exit_code\":0,\"start_time\":\"2024-12-30T01:14:34.656407Z\",\"end_time\":\"2024-12-30T01:14:34.657407Z\"}\n```\n",
Run: func(cmd *cobra.Command, args []string) {
ctx := hctx.MakeContext()
numImported, err := importFromJson(ctx)
lib.CheckFatalError(err)
fmt.Printf("Imported %v history entries\n", numImported)
},
}

func importFromJson(ctx context.Context) (int, error) {
// Get the data needed for filling in any missing columns
currentUser, err := user.Current()
if err != nil {
return 0, err
}
hostname, err := os.Hostname()
if err != nil {
return 0, err
}
homedir := hctx.GetHome(ctx)

// Build the entries
lines, err := lib.ReadStdin()
if err != nil {
return 0, fmt.Errorf("failed to read stdin for import: %w", err)
}
var entries []data.HistoryEntry
importEntryId := uuid.Must(uuid.NewRandom()).String()
importTimestamp := time.Now().UTC()
for i, line := range lines {
var entry data.HistoryEntry
err := json.Unmarshal([]byte(line), &entry)
if err != nil {
return 0, fmt.Errorf("failed to parse JSON line %#v: %w", line, err)
}
if entry.Command == "" {
return 0, fmt.Errorf("cannot import history entries without a command, JSON line: %#v", line)
}
if len(entry.CustomColumns) > 0 {
return 0, fmt.Errorf("cannot import history entries with custom columns, JSON line: %#v", line)
}
if entry.LocalUsername == "" {
entry.LocalUsername = currentUser.Username
}
if entry.Hostname == "" {
entry.Hostname = hostname
}
if entry.CurrentWorkingDirectory == "" {
entry.CurrentWorkingDirectory = "Unknown"
}
if entry.HomeDirectory == "" {
entry.HomeDirectory = homedir
}
// Set the timestamps so that they are monotonically increasing
startTime := importTimestamp.Add(time.Millisecond * time.Duration(i*2))
endTime := startTime.Add(time.Millisecond)
if entry.StartTime == *new(time.Time) {
entry.StartTime = startTime
}
if entry.EndTime == *new(time.Time) {
entry.EndTime = endTime
}
entry.DeviceId = hctx.GetConf(ctx).DeviceId
entry.EntryId = fmt.Sprintf("%s-%d", importEntryId, i)
entries = append(entries, entry)
}

// Insert the entries into the DB
db := hctx.GetDb(ctx)
err = db.CreateInBatches(entries, lib.ImportBatchSize).Error
if err != nil {
return 0, fmt.Errorf("failed to insert entries into DB: %w", err)
}

// Trigger a checkpoint so that these bulk entries are added from the WAL to the main DB
err = db.Exec("PRAGMA wal_checkpoint").Error
if err != nil {
return 0, fmt.Errorf("failed to checkpoint imported history: %w", err)
}
return len(entries), nil
}

func init() {
rootCmd.AddCommand(importCmd)
rootCmd.AddCommand(importJsonCmd)
}
10 changes: 6 additions & 4 deletions client/lib/lib.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ var (
GitCommit string = "Unknown"
)

// The batch size for the DB operations for importing history. Used by all types of imports.
var ImportBatchSize = 100

// 512KB ought to be enough for any reasonable cmd
// Funnily enough, 256KB actually wasn't enough. See https://github.com/ddworken/hishtory/issues/93
var maxSupportedLineLengthForImport = 512_000
Expand Down Expand Up @@ -235,7 +238,7 @@ func ImportHistory(ctx context.Context, shouldReadStdin, force bool) (int, error
return 0, fmt.Errorf("failed to count input lines during hishtory import: %w", err)
}
if shouldReadStdin {
extraEntries, err := readStdin()
extraEntries, err := ReadStdin()
if err != nil {
return 0, fmt.Errorf("failed to read stdin: %w", err)
}
Expand All @@ -260,7 +263,6 @@ func ImportHistory(ctx context.Context, shouldReadStdin, force bool) (int, error
var iteratorError error = nil
var batch []data.HistoryEntry
importTimestamp := time.Now().UTC()
batchSize := 100
importEntryId := uuid.Must(uuid.NewRandom()).String()
var bar *progressbar.ProgressBar
if totalNumEntries > NUM_IMPORTED_ENTRIES_SLOW {
Expand Down Expand Up @@ -296,7 +298,7 @@ func ImportHistory(ctx context.Context, shouldReadStdin, force bool) (int, error
EntryId: entryId,
})
batch = append(batch, entry)
if len(batch) > batchSize {
if len(batch) > ImportBatchSize {
err = RetryingDbFunction(func() error {
if err := db.Create(batch).Error; err != nil {
return fmt.Errorf("failed to import batch of history entries: %w", err)
Expand Down Expand Up @@ -347,7 +349,7 @@ func ImportHistory(ctx context.Context, shouldReadStdin, force bool) (int, error
return numEntriesImported, nil
}

func readStdin() ([]string, error) {
func ReadStdin() ([]string, error) {
ret := make([]string, 0)
in := bufio.NewReader(os.Stdin)
for {
Expand Down
2 changes: 2 additions & 0 deletions client/testdata/TestExportJson
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"command":"echo synth2","current_working_directory":"/tmp/","custom_columns":null,"end_time":"2022-10-18T04:43:24Z","exit_code":2,"home_directory":"/home/david/","hostname":"localhost","local_username":"david","start_time":"2022-10-18T04:43:21Z"}
{"command":"echo synth1","current_working_directory":"/tmp/","custom_columns":null,"end_time":"2022-10-18T04:43:19Z","exit_code":2,"home_directory":"/home/david/","hostname":"localhost","local_username":"david","start_time":"1970-01-14T22:56:07-08:00"}

0 comments on commit ffc224e

Please sign in to comment.