Skip to content

Commit 7950626

Browse files
OnDisk update/cleanup
1 parent 0227c6f commit 7950626

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+394
-297
lines changed

clang/test/CAS/daemon-cas-recovery.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
/// Construct a malformed CAS to recovery from.
66
// RUN: echo "abc" | llvm-cas --cas %t/cas --make-blob --data -
7-
// RUN: rm %t/cas/v1.1/v11.data
7+
// RUN: rm %t/cas/v1.1/data.v1
88
// RUN: not llvm-cas --cas %t/cas --validate --check-hash
99

1010
// RUN: env LLVM_CACHE_CAS_PATH=%t/cas LLVM_CAS_FORCE_VALIDATION=1 %clang-cache \

clang/test/CAS/depscan-cas-log.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@
1010
// RUN: -cc1-args -cc1 -triple x86_64-apple-macosx11.0.0 -emit-obj %s -o %t/t.o -fcas-path %t/cas
1111
// RUN: FileCheck %s --input-file %t/cas/v1.log
1212

13-
// CHECK: [[PID1:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v{{[0-9]+}}.index'
13+
// CHECK: [[PID1:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}index.v{{[0-9]+}}'
1414
// CHECK: [[PID1]] {{[0-9]*}}: create subtrie
1515

1616
// Even a minimal compilation involves at least 9 records for the cache key.
1717
// CHECK-COUNT-9: [[PID1]] {{[0-9]*}}: create record
1818

19-
// CHECK: [[PID2:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v{{[0-9]+}}.index'
20-
// CHECK: [[PID2]] {{[0-9]*}}: close mmap '{{.*}}v{{[0-9]+}}.index'
19+
// CHECK: [[PID2:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}index.v{{[0-9]+}}'
20+
// CHECK: [[PID2]] {{[0-9]*}}: close mmap '{{.*}}index.v{{[0-9]+}}'

clang/test/CAS/validate-once.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// RUN: rm -rf %t
22

33
// RUN: llvm-cas --cas %t/cas --ingest %s
4-
// RUN: mv %t/cas/v1.1/v11.data %t/cas/v1.1/v11.data.bak
4+
// RUN: mv %t/cas/v1.1/data.v1 %t/cas/v1.1/data.v1.bak
55

66
// RUN: %clang -cc1depscand -execute %{clang-daemon-dir}/%basename_t -cas-args -fcas-path %t/cas -- \
77
// RUN: %clang -target x86_64-apple-macos11 -I %S/Inputs \

llvm/include/llvm/CAS/OnDiskDataAllocator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class OnDiskDataAllocator {
6464

6565
/// \returns the buffer that was allocated at \p create time, with size
6666
/// \p UserHeaderSize.
67-
MutableArrayRef<uint8_t> getUserHeader();
67+
MutableArrayRef<uint8_t> getUserHeader() const;
6868

6969
size_t size() const;
7070
size_t capacity() const;

llvm/include/llvm/CAS/OnDiskGraphDB.h

Lines changed: 56 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,17 @@
1-
//===- OnDiskGraphDB.h ------------------------------------------*- C++ -*-===//
1+
//===----------------------------------------------------------------------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
8+
//
9+
/// \file
10+
/// This declares OnDiskGraphDB, an ondisk CAS database with a fixed length
11+
/// hash. This is the class that implements the database storage scheme without
12+
/// exposing the hashing algorithm.
13+
//
14+
//===----------------------------------------------------------------------===//
815

916
#ifndef LLVM_CAS_ONDISKGRAPHDB_H
1017
#define LLVM_CAS_ONDISKGRAPHDB_H
@@ -16,16 +23,13 @@
1623

1724
namespace llvm::cas::ondisk {
1825

19-
/// 8B reference.
26+
/// standard 8B reference inside OnDiskGraphDB.
2027
class InternalRef {
2128
public:
22-
FileOffset getFileOffset() const { return FileOffset(getRawOffset()); }
23-
29+
FileOffset getFileOffset() const { return FileOffset(Data); }
2430
uint64_t getRawData() const { return Data; }
25-
uint64_t getRawOffset() const { return Data; }
2631

2732
static InternalRef getFromRawData(uint64_t Data) { return InternalRef(Data); }
28-
2933
static InternalRef getFromOffset(FileOffset Offset) {
3034
return InternalRef(Offset.get());
3135
}
@@ -40,19 +44,17 @@ class InternalRef {
4044
uint64_t Data;
4145
};
4246

43-
/// 4B reference.
47+
/// compact 4B reference inside OnDiskGraphDB for smaller references.
4448
class InternalRef4B {
4549
public:
4650
FileOffset getFileOffset() const { return FileOffset(Data); }
47-
4851
uint32_t getRawData() const { return Data; }
4952

5053
/// Shrink to 4B reference.
5154
static std::optional<InternalRef4B> tryToShrink(InternalRef Ref) {
52-
uint64_t Offset = Ref.getRawOffset();
55+
uint64_t Offset = Ref.getRawData();
5356
if (Offset > UINT32_MAX)
5457
return std::nullopt;
55-
5658
return InternalRef4B(Offset);
5759
}
5860

@@ -148,10 +150,9 @@ class InternalRefArrayRef {
148150
if (is4B()) {
149151
auto *B = cast<const InternalRef4B *>(Begin);
150152
return ArrayRef((const uint8_t *)B, sizeof(InternalRef4B) * Size);
151-
} else {
152-
auto *B = cast<const InternalRef *>(Begin);
153-
return ArrayRef((const uint8_t *)B, sizeof(InternalRef) * Size);
154153
}
154+
auto *B = cast<const InternalRef *>(Begin);
155+
return ArrayRef((const uint8_t *)B, sizeof(InternalRef) * Size);
155156
}
156157

157158
InternalRefArrayRef(std::nullopt_t = std::nullopt) {
@@ -172,6 +173,7 @@ class InternalRefArrayRef {
172173
size_t Size = 0;
173174
};
174175

176+
/// Proxy for any on-disk object or raw data.
175177
struct OnDiskContent;
176178

177179
/// Reference to a node. The node's data may not be stored in the database.
@@ -217,6 +219,7 @@ class ObjectHandle {
217219
uint64_t Opaque;
218220
};
219221

222+
/// Iterator for ObjectID.
220223
class object_refs_iterator
221224
: public iterator_facade_base<object_refs_iterator,
222225
std::random_access_iterator_tag, ObjectID> {
@@ -294,6 +297,7 @@ class OnDiskGraphDB {
294297
/// \returns the data part of the provided object handle.
295298
ArrayRef<char> getObjectData(ObjectHandle Node) const;
296299

300+
/// \returns the object referenced by the provided object handle.
297301
object_refs_range getObjectRefs(ObjectHandle Node) const {
298302
InternalRefArrayRef Refs = getInternalRefs(Node);
299303
return make_range(Refs.begin(), Refs.end());
@@ -315,6 +319,13 @@ class OnDiskGraphDB {
315319
/// Hashing function type for validation.
316320
using HashingFuncT = function_ref<void(
317321
ArrayRef<ArrayRef<uint8_t>>, ArrayRef<char>, SmallVectorImpl<uint8_t> &)>;
322+
323+
/// Validate the OnDiskGraphDB.
324+
///
325+
/// \param Deep if true, rehash all the objects to make sure no data
326+
/// corruption in stored object, otherwise just validate the structure of
327+
/// CAS database.
328+
/// \param Hasher is the hashing function used for objects inside CAS.
318329
Error validate(bool Deep, HashingFuncT Hasher) const;
319330

320331
/// How to fault-in nodes if an upstream database is used.
@@ -357,9 +368,11 @@ class OnDiskGraphDB {
357368
OnlyInUpstreamDB,
358369
};
359370

371+
// Check if object exists and if it is on upstream only.
360372
Expected<ObjectPresence> getObjectPresence(ObjectID Ref,
361373
bool CheckUpstream) const;
362374

375+
// \returns true if object can be found in database.
363376
bool containsObject(ObjectID Ref, bool CheckUpstream) const {
364377
auto Presence = getObjectPresence(Ref, CheckUpstream);
365378
if (!Presence) {
@@ -379,46 +392,60 @@ class OnDiskGraphDB {
379392
/// When \p load is called for a node that doesn't exist, this function tries
380393
/// to load it from the upstream store and copy it to the primary one.
381394
Expected<std::optional<ObjectHandle>> faultInFromUpstream(ObjectID PrimaryID);
395+
396+
/// Import the entire tree from upstream with \param UpstreamNode as root.
382397
Error importFullTree(ObjectID PrimaryID, ObjectHandle UpstreamNode);
398+
/// Import only the \param UpstreamNode.
383399
Error importSingleNode(ObjectID PrimaryID, ObjectHandle UpstreamNode);
384400

401+
/// Found the IndexProxy for the hash.
385402
Expected<IndexProxy> indexHash(ArrayRef<uint8_t> Hash);
386403

404+
/// Get path for creating standalone data file.
405+
void getStandalonePath(StringRef FileSuffix, const IndexProxy &I,
406+
SmallVectorImpl<char> &Path) const;
407+
/// Create a standalone leaf file.
387408
Error createStandaloneLeaf(IndexProxy &I, ArrayRef<char> Data);
388-
409+
/// Create temporary file for standalone file storage.
389410
Expected<MappedTempFile> createTempFile(StringRef FinalPath, uint64_t Size);
390411

391-
OnDiskContent getContentFromHandle(ObjectHandle H) const;
392-
412+
/// @name Helper functions for internal data structures.
413+
/// @{
393414
static InternalRef getInternalRef(ObjectID Ref) {
394415
return InternalRef::getFromRawData(Ref.getOpaqueData());
395416
}
417+
396418
static ObjectID getExternalReference(InternalRef Ref) {
397419
return ObjectID::fromOpaqueData(Ref.getRawData());
398420
}
399421

400422
static ObjectID getExternalReference(const IndexProxy &I);
401423

402-
void getStandalonePath(StringRef FileSuffix, const IndexProxy &I,
403-
SmallVectorImpl<char> &Path) const;
424+
static InternalRef makeInternalRef(FileOffset IndexOffset);
404425

405426
Expected<ArrayRef<uint8_t>> getDigest(InternalRef Ref) const;
427+
406428
ArrayRef<uint8_t> getDigest(const IndexProxy &I) const;
407429

408-
Expected<IndexProxy> getIndexProxyFromRef(InternalRef Ref) const;
430+
OnDiskContent getContentFromHandle(ObjectHandle H) const;
409431

410-
static InternalRef makeInternalRef(FileOffset IndexOffset);
432+
Expected<IndexProxy> getIndexProxyFromRef(InternalRef Ref) const;
411433

412434
IndexProxy
413435
getIndexProxyFromPointer(OnDiskTrieRawHashMap::ConstOnDiskPtr P) const;
414436

415437
InternalRefArrayRef getInternalRefs(ObjectHandle Node) const;
438+
/// @}
416439

417-
void recordStandaloneSizeIncrease(size_t SizeIncrease);
440+
/// Get the atomic variable that keeps track of the standalone data storage size.
441+
std::atomic<uint64_t> &standaloneStorageSize() const;
418442

419-
std::atomic<uint64_t> &getStandaloneStorageSize();
443+
/// Increase the standalone data size.
444+
void recordStandaloneSizeIncrease(size_t SizeIncrease);
445+
/// Get the standalone data size.
420446
uint64_t getStandaloneStorageSize() const;
421447

448+
// Private constructor.
422449
OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
423450
OnDiskDataAllocator DataPool,
424451
std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy,
@@ -434,14 +461,19 @@ class OnDiskGraphDB {
434461
/// Data type is DataRecordHandle.
435462
OnDiskDataAllocator DataPool;
436463

437-
void *StandaloneData; // a StandaloneDataMap.
464+
// a StandaloneDataMap.
465+
void *StandaloneData;
438466

467+
// Path to the root directory.
439468
std::string RootPath;
440469

441-
/// Optional on-disk store to be used for faulting-in nodes.
470+
// Optional on-disk store to be used for faulting-in nodes.
442471
std::unique_ptr<OnDiskGraphDB> UpstreamDB;
472+
473+
// The policy used to fault in data from upstream.
443474
FaultInPolicy FIPolicy;
444475

476+
// Debug Logger.
445477
std::shared_ptr<OnDiskCASLogger> Logger;
446478
};
447479

llvm/include/llvm/CAS/OnDiskKeyValueDB.h

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,16 @@
1-
//===- OnDiskKeyValueDB.h ---------------------------------------*- C++ -*-===//
1+
//===----------------------------------------------------------------------===//
22
//
33
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44
// See https://llvm.org/LICENSE.txt for license information.
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
8+
//
9+
/// \file
10+
/// This declares OnDiskKeyValueDB, a key value storage database of fixed size
11+
/// key and value.
12+
//
13+
//===----------------------------------------------------------------------===//
814

915
#ifndef LLVM_CAS_ONDISKKEYVALUEDB_H
1016
#define LLVM_CAS_ONDISKKEYVALUEDB_H
@@ -35,9 +41,7 @@ class OnDiskKeyValueDB {
3541
Expected<std::optional<ArrayRef<char>>> get(ArrayRef<uint8_t> Key);
3642

3743
/// \returns Total size of stored data.
38-
size_t getStorageSize() const {
39-
return Cache.size();
40-
}
44+
size_t getStorageSize() const { return Cache.size(); }
4145

4246
/// \returns The precentage of space utilization of hard space limits.
4347
///
@@ -60,7 +64,10 @@ class OnDiskKeyValueDB {
6064
StringRef ValueName, size_t ValueSize,
6165
std::shared_ptr<OnDiskCASLogger> Logger = nullptr);
6266

63-
using CheckValueT = function_ref<Error(FileOffset Offset, ArrayRef<char>)>;
67+
/// Validate the storage with a callback \p CheckValue to check the stored
68+
/// value.
69+
using CheckValueT =
70+
function_ref<Error(FileOffset Offset, ArrayRef<char> Data)>;
6471
Error validate(CheckValueT CheckValue) const;
6572

6673
private:

llvm/include/llvm/Config/llvm-config.h.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,4 +146,7 @@
146146
coverage bugs, and to 0 otherwise. */
147147
#cmakedefine01 LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN
148148

149+
/* Define to 1 to enable LLVM OnDisk Content Addressable Storage */
150+
#cmakedefine01 LLVM_ENABLE_ONDISK_CAS
151+
149152
#endif

llvm/lib/CAS/CMakeLists.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,3 @@
1-
if (LLVM_ENABLE_ONDISK_CAS)
2-
add_definitions(-DLLVM_ENABLE_ONDISK_CAS=1)
3-
endif()
4-
51
add_llvm_component_library(LLVMCAS
62
ActionCache.cpp
73
ActionCaches.cpp

llvm/lib/CAS/OnDiskCommon.cpp

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,10 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "OnDiskCommon.h"
10-
#include "llvm/ADT/StringRef.h"
11-
#include "llvm/Config/config.h"
1210
#include "llvm/Support/Error.h"
11+
#include "llvm/Support/FileSystem.h"
1312
#include "llvm/Support/Process.h"
14-
#include <limits>
1513
#include <mutex>
16-
#include <optional>
1714
#include <thread>
1815

1916
#if __has_include(<sys/file.h>)
@@ -29,6 +26,10 @@
2926
#include <fcntl.h>
3027
#endif
3128

29+
#if __has_include(<sys/mount.h>)
30+
#include <sys/mount.h> // statfs
31+
#endif
32+
3233
using namespace llvm;
3334

3435
static uint64_t OnDiskCASMaxMappingSize = 0;
@@ -117,21 +118,22 @@ cas::ondisk::tryLockFileThreadSafe(int FD, std::chrono::milliseconds Timeout,
117118
#endif
118119
}
119120

120-
Expected<size_t> cas::ondisk::preallocateFileTail(int FD, size_t CurrentSize, size_t NewSize) {
121+
Expected<size_t> cas::ondisk::preallocateFileTail(int FD, size_t CurrentSize,
122+
size_t NewSize) {
121123
auto CreateError = [&](std::error_code EC) -> Expected<size_t> {
122124
if (EC == std::errc::not_supported)
123125
// Ignore ENOTSUP in case the filesystem cannot preallocate.
124126
return NewSize;
125127
#if defined(HAVE_POSIX_FALLOCATE)
126-
if (EC == std::errc::invalid_argument &&
127-
CurrentSize < NewSize && // len > 0
128+
if (EC == std::errc::invalid_argument && CurrentSize < NewSize && // len > 0
128129
NewSize < std::numeric_limits<off_t>::max()) // 0 <= offset, len < max
129130
// Prior to 2024, POSIX required EINVAL for cases that should be ENOTSUP,
130131
// so handle it the same as above if it is not one of the other ways to
131132
// get EINVAL.
132133
return NewSize;
133134
#endif
134-
return createStringError(EC, "failed to allocate to CAS file: " + EC.message());
135+
return createStringError(EC,
136+
"failed to allocate to CAS file: " + EC.message());
135137
};
136138
#if defined(HAVE_POSIX_FALLOCATE)
137139
// Note: posix_fallocate returns its error directly, not via errno.
@@ -156,6 +158,24 @@ Expected<size_t> cas::ondisk::preallocateFileTail(int FD, size_t CurrentSize, si
156158
assert(CurrentSize + FAlloc.fst_bytesalloc >= NewSize);
157159
return CurrentSize + FAlloc.fst_bytesalloc;
158160
#else
159-
return NewSize; // Pretend it worked.
161+
(void)CreateError; // Silence unused variable.
162+
return NewSize; // Pretend it worked.
163+
#endif
164+
}
165+
166+
bool cas::ondisk::useSmallMappingSize(const Twine &P) {
167+
// Add exceptions to use small database file here.
168+
#if defined(__APPLE__) && __has_include(<sys/mount.h>)
169+
// macOS tmpfs does not support sparse tails.
170+
SmallString<128> PathStorage;
171+
StringRef Path = P.toNullTerminatedStringRef(PathStorage);
172+
struct statfs StatFS;
173+
if (statfs(Path.data(), &StatFS) != 0)
174+
return false;
175+
176+
if (strcmp(StatFS.f_fstypename, "tmpfs") == 0)
177+
return true;
160178
#endif
179+
// Default to use regular datbase file.
180+
return false;
161181
}

0 commit comments

Comments
 (0)