Skip to content

Commit f867c5a

Browse files
[CAS] OnDisk update/cleanup. NFCI.
Cleanup for code upstreaming. Noticeable changes: * Hide some forward declared typesA. * Rename all the database files and use a unified version across index and dabasefile. This allows reset the version while not introduce downstream incompatibility. * Change unit-test to properly set a smaller file size and just not building OnDiskCAS test.
1 parent e0282d7 commit f867c5a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+461
-380
lines changed

clang/test/CAS/daemon-cas-recovery.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
/// Construct a malformed CAS to recovery from.
66
// RUN: echo "abc" | llvm-cas --cas %t/cas --make-blob --data -
7-
// RUN: rm %t/cas/v1.1/v11.data
7+
// RUN: rm %t/cas/v1.1/data.v1
88
// RUN: not llvm-cas --cas %t/cas --validate --check-hash
99

1010
// RUN: env LLVM_CACHE_CAS_PATH=%t/cas LLVM_CAS_FORCE_VALIDATION=1 %clang-cache \

clang/test/CAS/depscan-cas-log.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@
1010
// RUN: -cc1-args -cc1 -triple x86_64-apple-macosx11.0.0 -emit-obj %s -o %t/t.o -fcas-path %t/cas
1111
// RUN: FileCheck %s --input-file %t/cas/v1.log
1212

13-
// CHECK: [[PID1:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v{{[0-9]+}}.index'
13+
// CHECK: [[PID1:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}index.v{{[0-9]+}}'
1414
// CHECK: [[PID1]] {{[0-9]*}}: create subtrie
1515

1616
// Even a minimal compilation involves at least 9 records for the cache key.
1717
// CHECK-COUNT-9: [[PID1]] {{[0-9]*}}: create record
1818

19-
// CHECK: [[PID2:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v{{[0-9]+}}.index'
20-
// CHECK: [[PID2]] {{[0-9]*}}: close mmap '{{.*}}v{{[0-9]+}}.index'
19+
// CHECK: [[PID2:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}index.v{{[0-9]+}}'
20+
// CHECK: [[PID2]] {{[0-9]*}}: close mmap '{{.*}}index.v{{[0-9]+}}'

clang/test/CAS/validate-once.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// RUN: rm -rf %t
22

33
// RUN: llvm-cas --cas %t/cas --ingest %s
4-
// RUN: mv %t/cas/v1.1/v11.data %t/cas/v1.1/v11.data.bak
4+
// RUN: mv %t/cas/v1.1/data.v1 %t/cas/v1.1/data.v1.bak
55

66
// RUN: %clang -cc1depscand -execute %{clang-daemon-dir}/%basename_t -cas-args -fcas-path %t/cas -- \
77
// RUN: %clang -target x86_64-apple-macos11 -I %S/Inputs \

llvm/include/llvm/CAS/OnDiskDataAllocator.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,7 @@ class OnDiskDataAllocator {
3131
public:
3232
using ValueProxy = MutableArrayRef<char>;
3333

34-
/// An iterator-like return value for data insertion. Maybe it should be
35-
/// called \c iterator, but it has no increment.
34+
/// A pointer to data stored on disk.
3635
class OnDiskPtr {
3736
public:
3837
FileOffset getOffset() const { return Offset; }
@@ -56,15 +55,17 @@ class OnDiskDataAllocator {
5655
ValueProxy Value;
5756
};
5857

59-
/// Look up the data stored at the given offset.
58+
/// Get the data of \p Size stored at the given \p Offset. Note the allocator
59+
/// doesn't keep track of the allocation size, thus \p Size doesn't need to
60+
/// match the size of allocation but needs to be smaller.
6061
Expected<ArrayRef<char>> get(FileOffset Offset, size_t Size) const;
6162

6263
/// Allocate at least \p Size with 8-byte alignment.
6364
Expected<OnDiskPtr> allocate(size_t Size);
6465

6566
/// \returns the buffer that was allocated at \p create time, with size
6667
/// \p UserHeaderSize.
67-
MutableArrayRef<uint8_t> getUserHeader();
68+
MutableArrayRef<uint8_t> getUserHeader() const;
6869

6970
size_t size() const;
7071
size_t capacity() const;

llvm/include/llvm/CAS/OnDiskGraphDB.h

Lines changed: 56 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
1-
//===- OnDiskGraphDB.h ------------------------------------------*- C++ -*-===//
1+
//===----------------------------------------------------------------------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
8+
//
9+
/// \file
10+
/// This declares OnDiskGraphDB, an ondisk CAS database with a fixed length
11+
/// hash. This is the class that implements the database storage scheme without
12+
/// exposing the hashing algorithm.
13+
//
14+
//===----------------------------------------------------------------------===//
815

916
#ifndef LLVM_CAS_ONDISKGRAPHDB_H
1017
#define LLVM_CAS_ONDISKGRAPHDB_H
@@ -16,16 +23,13 @@
1623

1724
namespace llvm::cas::ondisk {
1825

19-
/// 8B reference.
26+
/// Standard 8 byte reference inside OnDiskGraphDB.
2027
class InternalRef {
2128
public:
22-
FileOffset getFileOffset() const { return FileOffset(getRawOffset()); }
23-
29+
FileOffset getFileOffset() const { return FileOffset(Data); }
2430
uint64_t getRawData() const { return Data; }
25-
uint64_t getRawOffset() const { return Data; }
2631

2732
static InternalRef getFromRawData(uint64_t Data) { return InternalRef(Data); }
28-
2933
static InternalRef getFromOffset(FileOffset Offset) {
3034
return InternalRef(Offset.get());
3135
}
@@ -40,19 +44,17 @@ class InternalRef {
4044
uint64_t Data;
4145
};
4246

43-
/// 4B reference.
47+
/// Compact 4 byte reference inside OnDiskGraphDB for smaller references.
4448
class InternalRef4B {
4549
public:
4650
FileOffset getFileOffset() const { return FileOffset(Data); }
47-
4851
uint32_t getRawData() const { return Data; }
4952

5053
/// Shrink to 4B reference.
5154
static std::optional<InternalRef4B> tryToShrink(InternalRef Ref) {
52-
uint64_t Offset = Ref.getRawOffset();
55+
uint64_t Offset = Ref.getRawData();
5356
if (Offset > UINT32_MAX)
5457
return std::nullopt;
55-
5658
return InternalRef4B(Offset);
5759
}
5860

@@ -148,10 +150,9 @@ class InternalRefArrayRef {
148150
if (is4B()) {
149151
auto *B = cast<const InternalRef4B *>(Begin);
150152
return ArrayRef((const uint8_t *)B, sizeof(InternalRef4B) * Size);
151-
} else {
152-
auto *B = cast<const InternalRef *>(Begin);
153-
return ArrayRef((const uint8_t *)B, sizeof(InternalRef) * Size);
154153
}
154+
auto *B = cast<const InternalRef *>(Begin);
155+
return ArrayRef((const uint8_t *)B, sizeof(InternalRef) * Size);
155156
}
156157

157158
InternalRefArrayRef(std::nullopt_t = std::nullopt) {
@@ -172,8 +173,6 @@ class InternalRefArrayRef {
172173
size_t Size = 0;
173174
};
174175

175-
struct OnDiskContent;
176-
177176
/// Reference to a node. The node's data may not be stored in the database.
178177
/// An \p ObjectID instance can only be used with the \p OnDiskGraphDB instance
179178
/// it came from. \p ObjectIDs from different \p OnDiskGraphDB instances are not
@@ -199,11 +198,11 @@ class ObjectID {
199198
/// Handle for a loaded node object.
200199
class ObjectHandle {
201200
public:
201+
explicit ObjectHandle(uint64_t Opaque) : Opaque(Opaque) {}
202202
uint64_t getOpaqueData() const { return Opaque; }
203203

204-
static ObjectHandle fromOpaqueData(uint64_t Opaque) {
205-
return ObjectHandle(Opaque);
206-
}
204+
static ObjectHandle fromFileOffset(FileOffset Offset);
205+
static ObjectHandle fromMemory(uintptr_t Ptr);
207206

208207
friend bool operator==(const ObjectHandle &LHS, const ObjectHandle &RHS) {
209208
return LHS.Opaque == RHS.Opaque;
@@ -213,10 +212,10 @@ class ObjectHandle {
213212
}
214213

215214
private:
216-
explicit ObjectHandle(uint64_t Opaque) : Opaque(Opaque) {}
217215
uint64_t Opaque;
218216
};
219217

218+
/// Iterator for ObjectID.
220219
class object_refs_iterator
221220
: public iterator_facade_base<object_refs_iterator,
222221
std::random_access_iterator_tag, ObjectID> {
@@ -294,6 +293,7 @@ class OnDiskGraphDB {
294293
/// \returns the data part of the provided object handle.
295294
ArrayRef<char> getObjectData(ObjectHandle Node) const;
296295

296+
/// \returns the object referenced by the provided object handle.
297297
object_refs_range getObjectRefs(ObjectHandle Node) const {
298298
InternalRefArrayRef Refs = getInternalRefs(Node);
299299
return make_range(Refs.begin(), Refs.end());
@@ -315,6 +315,13 @@ class OnDiskGraphDB {
315315
/// Hashing function type for validation.
316316
using HashingFuncT = function_ref<void(
317317
ArrayRef<ArrayRef<uint8_t>>, ArrayRef<char>, SmallVectorImpl<uint8_t> &)>;
318+
319+
/// Validate the OnDiskGraphDB.
320+
///
321+
/// \param Deep if true, rehash all the objects to ensure no data
322+
/// corruption in stored objects, otherwise just validate the structure of
323+
/// CAS database.
324+
/// \param Hasher is the hashing function used for objects inside CAS.
318325
Error validate(bool Deep, HashingFuncT Hasher) const;
319326

320327
/// How to fault-in nodes if an upstream database is used.
@@ -347,19 +354,20 @@ class OnDiskGraphDB {
347354
~OnDiskGraphDB();
348355

349356
private:
357+
/// Forward declaration for a proxy for an ondisk index record.
350358
struct IndexProxy;
351-
class TempFile;
352-
class MappedTempFile;
353359

354360
enum class ObjectPresence {
355361
Missing,
356362
InPrimaryDB,
357363
OnlyInUpstreamDB,
358364
};
359365

366+
/// Check if object exists and if it is on upstream only.
360367
Expected<ObjectPresence> getObjectPresence(ObjectID Ref,
361368
bool CheckUpstream) const;
362369

370+
/// \returns true if object can be found in database.
363371
bool containsObject(ObjectID Ref, bool CheckUpstream) const {
364372
auto Presence = getObjectPresence(Ref, CheckUpstream);
365373
if (!Presence) {
@@ -379,46 +387,57 @@ class OnDiskGraphDB {
379387
/// When \p load is called for a node that doesn't exist, this function tries
380388
/// to load it from the upstream store and copy it to the primary one.
381389
Expected<std::optional<ObjectHandle>> faultInFromUpstream(ObjectID PrimaryID);
390+
391+
/// Import the entire tree from upstream with \p UpstreamNode as root.
382392
Error importFullTree(ObjectID PrimaryID, ObjectHandle UpstreamNode);
393+
/// Import only the \param UpstreamNode.
383394
Error importSingleNode(ObjectID PrimaryID, ObjectHandle UpstreamNode);
384395

396+
/// Found the IndexProxy for the hash.
385397
Expected<IndexProxy> indexHash(ArrayRef<uint8_t> Hash);
386398

399+
/// Get path for creating standalone data file.
400+
void getStandalonePath(StringRef FileSuffix, const IndexProxy &I,
401+
SmallVectorImpl<char> &Path) const;
402+
/// Create a standalone leaf file.
387403
Error createStandaloneLeaf(IndexProxy &I, ArrayRef<char> Data);
388404

389-
Expected<MappedTempFile> createTempFile(StringRef FinalPath, uint64_t Size);
390-
391-
OnDiskContent getContentFromHandle(ObjectHandle H) const;
392-
405+
/// \name Helper functions for internal data structures.
406+
/// \{
393407
static InternalRef getInternalRef(ObjectID Ref) {
394408
return InternalRef::getFromRawData(Ref.getOpaqueData());
395409
}
410+
396411
static ObjectID getExternalReference(InternalRef Ref) {
397412
return ObjectID::fromOpaqueData(Ref.getRawData());
398413
}
399414

400415
static ObjectID getExternalReference(const IndexProxy &I);
401416

402-
void getStandalonePath(StringRef FileSuffix, const IndexProxy &I,
403-
SmallVectorImpl<char> &Path) const;
417+
static InternalRef makeInternalRef(FileOffset IndexOffset);
404418

405419
Expected<ArrayRef<uint8_t>> getDigest(InternalRef Ref) const;
420+
406421
ArrayRef<uint8_t> getDigest(const IndexProxy &I) const;
407422

408423
Expected<IndexProxy> getIndexProxyFromRef(InternalRef Ref) const;
409424

410-
static InternalRef makeInternalRef(FileOffset IndexOffset);
411-
412425
IndexProxy
413426
getIndexProxyFromPointer(OnDiskTrieRawHashMap::ConstOnDiskPtr P) const;
414427

415428
InternalRefArrayRef getInternalRefs(ObjectHandle Node) const;
429+
/// \}
416430

417-
void recordStandaloneSizeIncrease(size_t SizeIncrease);
431+
/// Get the atomic variable that keeps track of the standalone data storage
432+
/// size.
433+
std::atomic<uint64_t> &standaloneStorageSize() const;
418434

419-
std::atomic<uint64_t> &getStandaloneStorageSize();
435+
/// Increase the standalone data size.
436+
void recordStandaloneSizeIncrease(size_t SizeIncrease);
437+
/// Get the standalone data size.
420438
uint64_t getStandaloneStorageSize() const;
421439

440+
// Private constructor.
422441
OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
423442
OnDiskDataAllocator DataPool,
424443
std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy,
@@ -434,14 +453,19 @@ class OnDiskGraphDB {
434453
/// Data type is DataRecordHandle.
435454
OnDiskDataAllocator DataPool;
436455

437-
void *StandaloneData; // a StandaloneDataMap.
456+
/// A StandaloneDataMap.
457+
void *StandaloneData = nullptr;
438458

459+
/// Path to the root directory.
439460
std::string RootPath;
440461

441462
/// Optional on-disk store to be used for faulting-in nodes.
442463
std::unique_ptr<OnDiskGraphDB> UpstreamDB;
464+
465+
/// The policy used to fault in data from upstream.
443466
FaultInPolicy FIPolicy;
444467

468+
/// Debug Logger.
445469
std::shared_ptr<OnDiskCASLogger> Logger;
446470
};
447471

llvm/include/llvm/CAS/OnDiskKeyValueDB.h

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
1-
//===- OnDiskKeyValueDB.h ---------------------------------------*- C++ -*-===//
1+
//===----------------------------------------------------------------------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
8+
//
9+
/// \file
10+
/// This declares OnDiskKeyValueDB, a key value storage database of fixed size
11+
/// key and value.
12+
//
13+
//===----------------------------------------------------------------------===//
814

915
#ifndef LLVM_CAS_ONDISKKEYVALUEDB_H
1016
#define LLVM_CAS_ONDISKKEYVALUEDB_H
@@ -35,9 +41,7 @@ class OnDiskKeyValueDB {
3541
Expected<std::optional<ArrayRef<char>>> get(ArrayRef<uint8_t> Key);
3642

3743
/// \returns Total size of stored data.
38-
size_t getStorageSize() const {
39-
return Cache.size();
40-
}
44+
size_t getStorageSize() const { return Cache.size(); }
4145

4246
/// \returns The precentage of space utilization of hard space limits.
4347
///
@@ -60,7 +64,10 @@ class OnDiskKeyValueDB {
6064
StringRef ValueName, size_t ValueSize,
6165
std::shared_ptr<OnDiskCASLogger> Logger = nullptr);
6266

63-
using CheckValueT = function_ref<Error(FileOffset Offset, ArrayRef<char>)>;
67+
using CheckValueT =
68+
function_ref<Error(FileOffset Offset, ArrayRef<char> Data)>;
69+
/// Validate the storage with a callback \p CheckValue to check the stored
70+
/// value.
6471
Error validate(CheckValueT CheckValue) const;
6572

6673
private:

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,4 +146,7 @@
146146
coverage bugs, and to 0 otherwise. */
147147
#cmakedefine01 LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
148148

149+
/* Define to 1 to enable LLVM OnDisk Content Addressable Storage */
150+
#cmakedefine01 LLVM_ENABLE_ONDISK_CAS
151+
149152
#endif

llvm/lib/CAS/CMakeLists.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
if (LLVM_ENABLE_ONDISK_CAS)
2-
add_definitions(-DLLVM_ENABLE_ONDISK_CAS=1)
3-
endif()
4-
51
add_llvm_component_library(LLVMCAS
62
ActionCache.cpp
73
ActionCaches.cpp

llvm/lib/CAS/OnDiskCAS.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class OnDiskCAS : public BuiltinCAS {
5252
}
5353

5454
ondisk::ObjectHandle convertHandle(ObjectHandle Node) const {
55-
return ondisk::ObjectHandle::fromOpaqueData(Node.getInternalRef(*this));
55+
return ondisk::ObjectHandle(Node.getInternalRef(*this));
5656
}
5757

5858
ObjectRef convertRef(ondisk::ObjectID Ref) const {

0 commit comments

Comments
 (0)