Skip to content

Commit 2810a96

Browse files
OnDisk update/cleanup
Cleanup for code upstreaming
1 parent 0227c6f commit 2810a96

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+432
-340
lines changed

clang/test/CAS/daemon-cas-recovery.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
/// Construct a malformed CAS to recovery from.
66
// RUN: echo "abc" | llvm-cas --cas %t/cas --make-blob --data -
7-
// RUN: rm %t/cas/v1.1/v11.data
7+
// RUN: rm %t/cas/v1.1/data.v1
88
// RUN: not llvm-cas --cas %t/cas --validate --check-hash
99

1010
// RUN: env LLVM_CACHE_CAS_PATH=%t/cas LLVM_CAS_FORCE_VALIDATION=1 %clang-cache \

clang/test/CAS/depscan-cas-log.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@
1010
// RUN: -cc1-args -cc1 -triple x86_64-apple-macosx11.0.0 -emit-obj %s -o %t/t.o -fcas-path %t/cas
1111
// RUN: FileCheck %s --input-file %t/cas/v1.log
1212

13-
// CHECK: [[PID1:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v{{[0-9]+}}.index'
13+
// CHECK: [[PID1:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}index.v{{[0-9]+}}'
1414
// CHECK: [[PID1]] {{[0-9]*}}: create subtrie
1515

1616
// Even a minimal compilation involves at least 9 records for the cache key.
1717
// CHECK-COUNT-9: [[PID1]] {{[0-9]*}}: create record
1818

19-
// CHECK: [[PID2:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v{{[0-9]+}}.index'
20-
// CHECK: [[PID2]] {{[0-9]*}}: close mmap '{{.*}}v{{[0-9]+}}.index'
19+
// CHECK: [[PID2:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}index.v{{[0-9]+}}'
20+
// CHECK: [[PID2]] {{[0-9]*}}: close mmap '{{.*}}index.v{{[0-9]+}}'

clang/test/CAS/validate-once.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// RUN: rm -rf %t
22

33
// RUN: llvm-cas --cas %t/cas --ingest %s
4-
// RUN: mv %t/cas/v1.1/v11.data %t/cas/v1.1/v11.data.bak
4+
// RUN: mv %t/cas/v1.1/data.v1 %t/cas/v1.1/data.v1.bak
55

66
// RUN: %clang -cc1depscand -execute %{clang-daemon-dir}/%basename_t -cas-args -fcas-path %t/cas -- \
77
// RUN: %clang -target x86_64-apple-macos11 -I %S/Inputs \

llvm/include/llvm/CAS/OnDiskDataAllocator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class OnDiskDataAllocator {
6464

6565
/// \returns the buffer that was allocated at \p create time, with size
6666
/// \p UserHeaderSize.
67-
MutableArrayRef<uint8_t> getUserHeader();
67+
MutableArrayRef<uint8_t> getUserHeader() const;
6868

6969
size_t size() const;
7070
size_t capacity() const;

llvm/include/llvm/CAS/OnDiskGraphDB.h

Lines changed: 52 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
1-
//===- OnDiskGraphDB.h ------------------------------------------*- C++ -*-===//
1+
//===----------------------------------------------------------------------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
8+
//
9+
/// \file
10+
/// This declares OnDiskGraphDB, an ondisk CAS database with a fixed length
11+
/// hash. This is the class that implements the database storage scheme without
12+
/// exposing the hashing algorithm.
13+
//
14+
//===----------------------------------------------------------------------===//
815

916
#ifndef LLVM_CAS_ONDISKGRAPHDB_H
1017
#define LLVM_CAS_ONDISKGRAPHDB_H
@@ -16,16 +23,13 @@
1623

1724
namespace llvm::cas::ondisk {
1825

19-
/// 8B reference.
26+
/// Standard 8B reference inside OnDiskGraphDB.
2027
class InternalRef {
2128
public:
22-
FileOffset getFileOffset() const { return FileOffset(getRawOffset()); }
23-
29+
FileOffset getFileOffset() const { return FileOffset(Data); }
2430
uint64_t getRawData() const { return Data; }
25-
uint64_t getRawOffset() const { return Data; }
2631

2732
static InternalRef getFromRawData(uint64_t Data) { return InternalRef(Data); }
28-
2933
static InternalRef getFromOffset(FileOffset Offset) {
3034
return InternalRef(Offset.get());
3135
}
@@ -40,19 +44,17 @@ class InternalRef {
4044
uint64_t Data;
4145
};
4246

43-
/// 4B reference.
47+
/// Compact 4B reference inside OnDiskGraphDB for smaller references.
4448
class InternalRef4B {
4549
public:
4650
FileOffset getFileOffset() const { return FileOffset(Data); }
47-
4851
uint32_t getRawData() const { return Data; }
4952

5053
/// Shrink to 4B reference.
5154
static std::optional<InternalRef4B> tryToShrink(InternalRef Ref) {
52-
uint64_t Offset = Ref.getRawOffset();
55+
uint64_t Offset = Ref.getRawData();
5356
if (Offset > UINT32_MAX)
5457
return std::nullopt;
55-
5658
return InternalRef4B(Offset);
5759
}
5860

@@ -148,10 +150,9 @@ class InternalRefArrayRef {
148150
if (is4B()) {
149151
auto *B = cast<const InternalRef4B *>(Begin);
150152
return ArrayRef((const uint8_t *)B, sizeof(InternalRef4B) * Size);
151-
} else {
152-
auto *B = cast<const InternalRef *>(Begin);
153-
return ArrayRef((const uint8_t *)B, sizeof(InternalRef) * Size);
154153
}
154+
auto *B = cast<const InternalRef *>(Begin);
155+
return ArrayRef((const uint8_t *)B, sizeof(InternalRef) * Size);
155156
}
156157

157158
InternalRefArrayRef(std::nullopt_t = std::nullopt) {
@@ -172,8 +173,6 @@ class InternalRefArrayRef {
172173
size_t Size = 0;
173174
};
174175

175-
struct OnDiskContent;
176-
177176
/// Reference to a node. The node's data may not be stored in the database.
178177
/// An \p ObjectID instance can only be used with the \p OnDiskGraphDB instance
179178
/// it came from. \p ObjectIDs from different \p OnDiskGraphDB instances are not
@@ -217,6 +216,7 @@ class ObjectHandle {
217216
uint64_t Opaque;
218217
};
219218

219+
/// Iterator for ObjectID.
220220
class object_refs_iterator
221221
: public iterator_facade_base<object_refs_iterator,
222222
std::random_access_iterator_tag, ObjectID> {
@@ -294,6 +294,7 @@ class OnDiskGraphDB {
294294
/// \returns the data part of the provided object handle.
295295
ArrayRef<char> getObjectData(ObjectHandle Node) const;
296296

297+
/// \returns the object referenced by the provided object handle.
297298
object_refs_range getObjectRefs(ObjectHandle Node) const {
298299
InternalRefArrayRef Refs = getInternalRefs(Node);
299300
return make_range(Refs.begin(), Refs.end());
@@ -315,6 +316,13 @@ class OnDiskGraphDB {
315316
/// Hashing function type for validation.
316317
using HashingFuncT = function_ref<void(
317318
ArrayRef<ArrayRef<uint8_t>>, ArrayRef<char>, SmallVectorImpl<uint8_t> &)>;
319+
320+
/// Validate the OnDiskGraphDB.
321+
///
322+
/// \param Deep if true, rehash all the objects to ensure no data
323+
/// corruption in stored objects, otherwise just validate the structure of
324+
/// CAS database.
325+
/// \param Hasher is the hashing function used for objects inside CAS.
318326
Error validate(bool Deep, HashingFuncT Hasher) const;
319327

320328
/// How to fault-in nodes if an upstream database is used.
@@ -347,19 +355,20 @@ class OnDiskGraphDB {
347355
~OnDiskGraphDB();
348356

349357
private:
358+
/// Forward declaration for a proxy for an ondisk index record.
350359
struct IndexProxy;
351-
class TempFile;
352-
class MappedTempFile;
353360

354361
enum class ObjectPresence {
355362
Missing,
356363
InPrimaryDB,
357364
OnlyInUpstreamDB,
358365
};
359366

367+
/// Check if object exists and if it is on upstream only.
360368
Expected<ObjectPresence> getObjectPresence(ObjectID Ref,
361369
bool CheckUpstream) const;
362370

371+
/// \returns true if object can be found in database.
363372
bool containsObject(ObjectID Ref, bool CheckUpstream) const {
364373
auto Presence = getObjectPresence(Ref, CheckUpstream);
365374
if (!Presence) {
@@ -379,46 +388,56 @@ class OnDiskGraphDB {
379388
/// When \p load is called for a node that doesn't exist, this function tries
380389
/// to load it from the upstream store and copy it to the primary one.
381390
Expected<std::optional<ObjectHandle>> faultInFromUpstream(ObjectID PrimaryID);
391+
392+
/// Import the entire tree from upstream with \p UpstreamNode as root.
382393
Error importFullTree(ObjectID PrimaryID, ObjectHandle UpstreamNode);
394+
/// Import only the \param UpstreamNode.
383395
Error importSingleNode(ObjectID PrimaryID, ObjectHandle UpstreamNode);
384396

397+
/// Found the IndexProxy for the hash.
385398
Expected<IndexProxy> indexHash(ArrayRef<uint8_t> Hash);
386399

400+
/// Get path for creating standalone data file.
401+
void getStandalonePath(StringRef FileSuffix, const IndexProxy &I,
402+
SmallVectorImpl<char> &Path) const;
403+
/// Create a standalone leaf file.
387404
Error createStandaloneLeaf(IndexProxy &I, ArrayRef<char> Data);
388405

389-
Expected<MappedTempFile> createTempFile(StringRef FinalPath, uint64_t Size);
390-
391-
OnDiskContent getContentFromHandle(ObjectHandle H) const;
392-
406+
/// @name Helper functions for internal data structures.
407+
/// @{
393408
static InternalRef getInternalRef(ObjectID Ref) {
394409
return InternalRef::getFromRawData(Ref.getOpaqueData());
395410
}
411+
396412
static ObjectID getExternalReference(InternalRef Ref) {
397413
return ObjectID::fromOpaqueData(Ref.getRawData());
398414
}
399415

400416
static ObjectID getExternalReference(const IndexProxy &I);
401417

402-
void getStandalonePath(StringRef FileSuffix, const IndexProxy &I,
403-
SmallVectorImpl<char> &Path) const;
418+
static InternalRef makeInternalRef(FileOffset IndexOffset);
404419

405420
Expected<ArrayRef<uint8_t>> getDigest(InternalRef Ref) const;
421+
406422
ArrayRef<uint8_t> getDigest(const IndexProxy &I) const;
407423

408424
Expected<IndexProxy> getIndexProxyFromRef(InternalRef Ref) const;
409425

410-
static InternalRef makeInternalRef(FileOffset IndexOffset);
411-
412426
IndexProxy
413427
getIndexProxyFromPointer(OnDiskTrieRawHashMap::ConstOnDiskPtr P) const;
414428

415429
InternalRefArrayRef getInternalRefs(ObjectHandle Node) const;
430+
/// @}
416431

417-
void recordStandaloneSizeIncrease(size_t SizeIncrease);
432+
/// Get the atomic variable that keeps track of the standalone data storage size.
433+
std::atomic<uint64_t> &standaloneStorageSize() const;
418434

419-
std::atomic<uint64_t> &getStandaloneStorageSize();
435+
/// Increase the standalone data size.
436+
void recordStandaloneSizeIncrease(size_t SizeIncrease);
437+
/// Get the standalone data size.
420438
uint64_t getStandaloneStorageSize() const;
421439

440+
// Private constructor.
422441
OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
423442
OnDiskDataAllocator DataPool,
424443
std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy,
@@ -434,14 +453,19 @@ class OnDiskGraphDB {
434453
/// Data type is DataRecordHandle.
435454
OnDiskDataAllocator DataPool;
436455

437-
void *StandaloneData; // a StandaloneDataMap.
456+
/// A StandaloneDataMap.
457+
void *StandaloneData;
438458

459+
/// Path to the root directory.
439460
std::string RootPath;
440461

441462
/// Optional on-disk store to be used for faulting-in nodes.
442463
std::unique_ptr<OnDiskGraphDB> UpstreamDB;
464+
465+
/// The policy used to fault in data from upstream.
443466
FaultInPolicy FIPolicy;
444467

468+
/// Debug Logger.
445469
std::shared_ptr<OnDiskCASLogger> Logger;
446470
};
447471

llvm/include/llvm/CAS/OnDiskKeyValueDB.h

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
1-
//===- OnDiskKeyValueDB.h ---------------------------------------*- C++ -*-===//
1+
//===----------------------------------------------------------------------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
8+
//
9+
/// \file
10+
/// This declares OnDiskKeyValueDB, a key value storage database of fixed size
11+
/// key and value.
12+
//
13+
//===----------------------------------------------------------------------===//
814

915
#ifndef LLVM_CAS_ONDISKKEYVALUEDB_H
1016
#define LLVM_CAS_ONDISKKEYVALUEDB_H
@@ -35,9 +41,7 @@ class OnDiskKeyValueDB {
3541
Expected<std::optional<ArrayRef<char>>> get(ArrayRef<uint8_t> Key);
3642

3743
/// \returns Total size of stored data.
38-
size_t getStorageSize() const {
39-
return Cache.size();
40-
}
44+
size_t getStorageSize() const { return Cache.size(); }
4145

4246
/// \returns The precentage of space utilization of hard space limits.
4347
///
@@ -60,7 +64,10 @@ class OnDiskKeyValueDB {
6064
StringRef ValueName, size_t ValueSize,
6165
std::shared_ptr<OnDiskCASLogger> Logger = nullptr);
6266

63-
using CheckValueT = function_ref<Error(FileOffset Offset, ArrayRef<char>)>;
67+
using CheckValueT =
68+
function_ref<Error(FileOffset Offset, ArrayRef<char> Data)>;
69+
/// Validate the storage with a callback \p CheckValue to check the stored
70+
/// value.
6471
Error validate(CheckValueT CheckValue) const;
6572

6673
private:

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,4 +146,7 @@
146146
coverage bugs, and to 0 otherwise. */
147147
#cmakedefine01 LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
148148

149+
/* Define to 1 to enable LLVM OnDisk Content Addressable Storage */
150+
#cmakedefine01 LLVM_ENABLE_ONDISK_CAS
151+
149152
#endif

llvm/lib/CAS/CMakeLists.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
if (LLVM_ENABLE_ONDISK_CAS)
2-
add_definitions(-DLLVM_ENABLE_ONDISK_CAS=1)
3-
endif()
4-
51
add_llvm_component_library(LLVMCAS
62
ActionCache.cpp
73
ActionCaches.cpp

0 commit comments

Comments
 (0)