Skip to content

Commit e7887b6

Browse files
committed
⚡ Reduce startup time when sync is enabled siyuan-note/siyuan#13589
1 parent b6410d9 commit e7887b6

File tree

4 files changed

+152
-72
lines changed

4 files changed

+152
-72
lines changed

go.mod

+2
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ require (
5656
github.com/quic-go/qpack v0.5.1 // indirect
5757
github.com/quic-go/quic-go v0.48.2 // indirect
5858
github.com/refraction-networking/utls v1.6.7 // indirect
59+
github.com/vmihailenco/msgpack/v5 v5.3.5 // indirect
60+
github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect
5961
go.uber.org/mock v0.5.0 // indirect
6062
golang.org/x/crypto v0.31.0 // indirect
6163
golang.org/x/exp v0.0.0-20241217172543-b2144cdd0a67 // indirect

go.sum

+5
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,11 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT
139139
github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
140140
github.com/studio-b12/gowebdav v0.9.0 h1:1j1sc9gQnNxbXXM4M/CebPOX4aXYtr7MojAVcN4dHjU=
141141
github.com/studio-b12/gowebdav v0.9.0/go.mod h1:bHA7t77X/QFExdeAnDzK6vKM34kEZAcE1OX4MfiwjkE=
142+
github.com/vmihailenco/msgpack v4.0.4/go.mod h1:fy3FlTQTDXWkZ7Bh6AcGMlsjHatGryHQYUTf1ShIgkk=
143+
github.com/vmihailenco/msgpack/v5 v5.3.5 h1:5gO0H1iULLWGhs2H5tbAHIZTV8/cYafcFOr9znI5mJU=
144+
github.com/vmihailenco/msgpack/v5 v5.3.5/go.mod h1:7xyJ9e+0+9SaZT0Wt1RGleJXzli6Q/V5KbhBonMG9jc=
145+
github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g=
146+
github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds=
142147
github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU=
143148
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
144149
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=

ref.go

+67-1
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,14 @@ import (
2020
"errors"
2121
"os"
2222
"path/filepath"
23+
"time"
2324

25+
"github.com/88250/go-humanize"
2426
"github.com/88250/gulu"
2527
"github.com/siyuan-note/dejavu/entity"
2628
"github.com/siyuan-note/filelock"
2729
"github.com/siyuan-note/logging"
30+
"github.com/vmihailenco/msgpack/v5"
2831
)
2932

3033
var ErrNotFoundIndex = errors.New("not found index")
@@ -51,7 +54,16 @@ func (repo *Repo) Latest() (ret *entity.Index, err error) {
5154
return
5255
}
5356

57+
// FullIndex 描述了完整的索引结构。
58+
type FullIndex struct {
59+
ID string `json:"id"`
60+
Files []*entity.File `json:"files"`
61+
Spec int `json:"spec"`
62+
}
63+
5464
func (repo *Repo) UpdateLatest(index *entity.Index) (err error) {
65+
start := time.Now()
66+
5567
refs := filepath.Join(repo.Path, "refs")
5668
err = os.MkdirAll(refs, 0755)
5769
if nil != err {
@@ -61,7 +73,61 @@ func (repo *Repo) UpdateLatest(index *entity.Index) (err error) {
6173
if nil != err {
6274
return
6375
}
64-
logging.LogInfof("updated local latest to [%s]", index.String())
76+
77+
fullLatestPath := filepath.Join(repo.Path, "full-latest.json")
78+
files, err := repo.GetFiles(index)
79+
if nil != err {
80+
return
81+
}
82+
83+
fullIndex := &FullIndex{ID: index.ID, Files: files, Spec: 0}
84+
data, err := msgpack.Marshal(fullIndex)
85+
if nil != err {
86+
return
87+
}
88+
err = gulu.File.WriteFileSafer(fullLatestPath, data, 0644)
89+
if nil != err {
90+
return
91+
}
92+
93+
logging.LogInfof("updated local latest to [%s], full latest [size=%s], cost [%s]", index.String(), humanize.Bytes(uint64(len(data))), time.Since(start))
94+
return
95+
}
96+
97+
func (repo *Repo) getFullLatest(latest *entity.Index) (ret *FullIndex) {
98+
start := time.Now()
99+
100+
fullLatestPath := filepath.Join(repo.Path, "full-latest.json")
101+
if !gulu.File.IsExist(fullLatestPath) {
102+
return
103+
}
104+
105+
data, err := os.ReadFile(fullLatestPath)
106+
if nil != err {
107+
logging.LogErrorf("read full latest failed: %s", err)
108+
return
109+
}
110+
111+
ret = &FullIndex{}
112+
if err = msgpack.Unmarshal(data, ret); nil != err {
113+
logging.LogErrorf("unmarshal full latest [%s] failed: %s", fullLatestPath, err)
114+
ret = nil
115+
if err = os.RemoveAll(fullLatestPath); nil != err {
116+
logging.LogErrorf("remove full latest [%s] failed: %s", fullLatestPath, err)
117+
}
118+
return
119+
}
120+
121+
if ret.ID != latest.ID {
122+
logging.LogErrorf("full latest ID [%s] not match latest ID [%s]", ret.ID, latest.ID)
123+
ret = nil
124+
if err = os.RemoveAll(fullLatestPath); nil != err {
125+
logging.LogErrorf("remove full latest [%s] failed: %s", fullLatestPath, err)
126+
}
127+
return
128+
}
129+
130+
logging.LogInfof("got local full latest [size=%s], cost [%s]", humanize.Bytes(uint64(len(data))), time.Since(start))
65131
return
66132
}
67133

repo.go

+78-71
Original file line numberDiff line numberDiff line change
@@ -690,90 +690,96 @@ func (repo *Repo) index0(memo string, checkChunks bool, context map[string]inter
690690
init = true
691691
}
692692

693-
var workerErrs []error
694-
workerErrLock := sync.Mutex{}
695693
var upserts, removes, latestFiles []*entity.File
696-
if !init {
697-
start = time.Now()
698-
count := atomic.Int32{}
699-
total := len(files)
700-
eventbus.Publish(eventbus.EvtIndexBeforeGetLatestFiles, context, total)
701-
lock := &sync.Mutex{}
702-
waitGroup := &sync.WaitGroup{}
703-
p, _ := ants.NewPoolWithFunc(4, func(arg interface{}) {
704-
defer waitGroup.Done()
705-
706-
count.Add(1)
707-
eventbus.Publish(eventbus.EvtIndexGetLatestFile, context, int(count.Load()), total)
708-
709-
fileID := arg.(string)
710-
file, getErr := repo.store.GetFile(fileID)
711-
if nil != getErr {
712-
logging.LogErrorf("get file [%s] failed: %s", fileID, getErr)
713-
workerErrLock.Lock()
714-
workerErrs = append(workerErrs, ErrRepoFatal)
715-
workerErrLock.Unlock()
716-
return
717-
}
718-
719-
lock.Lock()
720-
latestFiles = append(latestFiles, file)
721-
lock.Unlock()
694+
fullLatest := repo.getFullLatest(latest)
695+
if nil != fullLatest {
696+
latestFiles = fullLatest.Files
697+
} else {
698+
var workerErrs []error
699+
workerErrLock := sync.Mutex{}
700+
if !init {
701+
start = time.Now()
702+
count := atomic.Int32{}
703+
total := len(files)
704+
eventbus.Publish(eventbus.EvtIndexBeforeGetLatestFiles, context, total)
705+
lock := &sync.Mutex{}
706+
waitGroup := &sync.WaitGroup{}
707+
p, _ := ants.NewPoolWithFunc(4, func(arg interface{}) {
708+
defer waitGroup.Done()
709+
710+
count.Add(1)
711+
eventbus.Publish(eventbus.EvtIndexGetLatestFile, context, int(count.Load()), total)
712+
713+
fileID := arg.(string)
714+
file, getErr := repo.store.GetFile(fileID)
715+
if nil != getErr {
716+
logging.LogErrorf("get file [%s] failed: %s", fileID, getErr)
717+
workerErrLock.Lock()
718+
workerErrs = append(workerErrs, ErrRepoFatal)
719+
workerErrLock.Unlock()
720+
return
721+
}
722722

723-
if checkChunks { // 仅在非移动端校验,因为移动端私有数据空间不会存在外部操作导致分块损坏的情况 https://github.com/siyuan-note/siyuan/issues/13216
724-
// Check local data chunk integrity before data synchronization https://github.com/siyuan-note/siyuan/issues/8853
725-
for _, chunk := range file.Chunks {
726-
info, statErr := repo.store.Stat(chunk)
727-
if nil == statErr {
728-
continue
729-
}
723+
lock.Lock()
724+
latestFiles = append(latestFiles, file)
725+
lock.Unlock()
730726

731-
if nil != info {
732-
logging.LogWarnf("stat file [%s, %s, %s, %d] chunk [%s, perm=%04o] failed: %s",
733-
file.ID, file.Path, time.UnixMilli(file.Updated).Format("2006-01-02 15:04:05"), file.Size, chunk, info.Mode().Perm(), statErr)
734-
} else {
735-
logging.LogWarnf("stat file [%s, %s, %s, %d] chunk [%s] failed: %s",
736-
file.ID, file.Path, time.UnixMilli(file.Updated).Format("2006-01-02 15:04:05"), file.Size, chunk, statErr)
737-
}
727+
if checkChunks { // 仅在非移动端校验,因为移动端私有数据空间不会存在外部操作导致分块损坏的情况 https://github.com/siyuan-note/siyuan/issues/13216
728+
// Check local data chunk integrity before data synchronization https://github.com/siyuan-note/siyuan/issues/8853
729+
for _, chunk := range file.Chunks {
730+
info, statErr := repo.store.Stat(chunk)
731+
if nil == statErr {
732+
continue
733+
}
738734

739-
if errors.Is(statErr, os.ErrPermission) {
740-
// 如果是权限问题,则尝试修改权限,不认为是分块文件损坏
741-
// Improve checking local data chunk integrity before data sync https://github.com/siyuan-note/siyuan/issues/9688
742-
if chmodErr := os.Chmod(chunk, 0644); nil != chmodErr {
743-
logging.LogWarnf("chmod file [%s] failed: %s", chunk, chmodErr)
735+
if nil != info {
736+
logging.LogWarnf("stat file [%s, %s, %s, %d] chunk [%s, perm=%04o] failed: %s",
737+
file.ID, file.Path, time.UnixMilli(file.Updated).Format("2006-01-02 15:04:05"), file.Size, chunk, info.Mode().Perm(), statErr)
744738
} else {
745-
logging.LogInfof("chmod file [%s] to [0644]", chunk)
739+
logging.LogWarnf("stat file [%s, %s, %s, %d] chunk [%s] failed: %s",
740+
file.ID, file.Path, time.UnixMilli(file.Updated).Format("2006-01-02 15:04:05"), file.Size, chunk, statErr)
746741
}
747-
continue
748-
}
749742

750-
if errors.Is(statErr, os.ErrNotExist) {
751-
workerErrLock.Lock()
752-
workerErrs = append(workerErrs, ErrRepoFatal)
753-
workerErrLock.Unlock()
754-
return
743+
if errors.Is(statErr, os.ErrPermission) {
744+
// 如果是权限问题,则尝试修改权限,不认为是分块文件损坏
745+
// Improve checking local data chunk integrity before data sync https://github.com/siyuan-note/siyuan/issues/9688
746+
if chmodErr := os.Chmod(chunk, 0644); nil != chmodErr {
747+
logging.LogWarnf("chmod file [%s] failed: %s", chunk, chmodErr)
748+
} else {
749+
logging.LogInfof("chmod file [%s] to [0644]", chunk)
750+
}
751+
continue
752+
}
753+
754+
if errors.Is(statErr, os.ErrNotExist) {
755+
workerErrLock.Lock()
756+
workerErrs = append(workerErrs, ErrRepoFatal)
757+
workerErrLock.Unlock()
758+
return
759+
}
755760
}
756761
}
762+
})
763+
764+
for _, f := range latest.Files {
765+
waitGroup.Add(1)
766+
err = p.Invoke(f)
767+
if nil != err {
768+
logging.LogErrorf("invoke failed: %s", err)
769+
return
770+
}
757771
}
758-
})
759-
760-
for _, f := range latest.Files {
761-
waitGroup.Add(1)
762-
err = p.Invoke(f)
763-
if nil != err {
764-
logging.LogErrorf("invoke failed: %s", err)
772+
waitGroup.Wait()
773+
p.Release()
774+
logging.LogInfof("get latest files [files=%d] cost [%s]", len(latestFiles), time.Since(start))
775+
if 0 < len(workerErrs) {
776+
err = workerErrs[0]
777+
logging.LogErrorf("get latest files failed: %s", err)
765778
return
766779
}
767780
}
768-
waitGroup.Wait()
769-
p.Release()
770-
logging.LogInfof("get latest files [files=%d] cost [%s]", len(latestFiles), time.Since(start))
771-
if 0 < len(workerErrs) {
772-
err = workerErrs[0]
773-
logging.LogErrorf("get latest files failed: %s", err)
774-
return
775-
}
776781
}
782+
777783
upserts, removes = repo.diffUpsertRemove(files, latestFiles, false)
778784
if 1 > len(upserts) && 1 > len(removes) {
779785
ret = latest
@@ -795,7 +801,8 @@ func (repo *Repo) index0(memo string, checkChunks bool, context map[string]inter
795801

796802
count := atomic.Int32{}
797803
total := len(upserts)
798-
workerErrs = nil
804+
var workerErrs []error
805+
workerErrLock := sync.Mutex{}
799806
eventbus.Publish(eventbus.EvtIndexUpsertFiles, context, total)
800807
waitGroup := &sync.WaitGroup{}
801808
p, _ := ants.NewPoolWithFunc(4, func(arg interface{}) {

0 commit comments

Comments
 (0)