1- // ===- OnDiskGraphDB.h  ------------------------------------------*- C++ -* -===//
1+ // ===--------------------------------------------------------------------- -===//
22// 
33//  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44//  See https://llvm.org/LICENSE.txt for license information.
55//  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66// 
77// ===----------------------------------------------------------------------===//
8+ // 
9+ // / \file
10+ // / This declares OnDiskGraphDB, an ondisk CAS database with a fixed length
11+ // / hash. This is the class that implements the database storage scheme without
12+ // / exposing the hashing algorithm.
13+ // 
14+ // ===----------------------------------------------------------------------===//
815
916#ifndef  LLVM_CAS_ONDISKGRAPHDB_H
1017#define  LLVM_CAS_ONDISKGRAPHDB_H 
1623
1724namespace  llvm ::cas::ondisk {
1825
19- // / 8B  reference.
26+ // / Standard 8 byte  reference inside OnDiskGraphDB .
2027class  InternalRef  {
2128public: 
22-   FileOffset getFileOffset () const  { return  FileOffset (getRawOffset ()); }
23- 
29+   FileOffset getFileOffset () const  { return  FileOffset (Data); }
2430  uint64_t  getRawData () const  { return  Data; }
25-   uint64_t  getRawOffset () const  { return  Data; }
2631
2732  static  InternalRef getFromRawData (uint64_t  Data) { return  InternalRef (Data); }
28- 
2933  static  InternalRef getFromOffset (FileOffset Offset) {
3034    return  InternalRef (Offset.get ());
3135  }
@@ -40,19 +44,17 @@ class InternalRef {
4044  uint64_t  Data;
4145};
4246
43- // / 4B  reference.
47+ // / Compact 4 byte  reference inside OnDiskGraphDB for smaller references .
4448class  InternalRef4B  {
4549public: 
4650  FileOffset getFileOffset () const  { return  FileOffset (Data); }
47- 
4851  uint32_t  getRawData () const  { return  Data; }
4952
5053  // / Shrink to 4B reference.
5154  static  std::optional<InternalRef4B> tryToShrink (InternalRef Ref) {
52-     uint64_t  Offset = Ref.getRawOffset ();
55+     uint64_t  Offset = Ref.getRawData ();
5356    if  (Offset > UINT32_MAX)
5457      return  std::nullopt ;
55- 
5658    return  InternalRef4B (Offset);
5759  }
5860
@@ -148,10 +150,9 @@ class InternalRefArrayRef {
148150    if  (is4B ()) {
149151      auto  *B = cast<const  InternalRef4B *>(Begin);
150152      return  ArrayRef ((const  uint8_t  *)B, sizeof (InternalRef4B) * Size);
151-     } else  {
152-       auto  *B = cast<const  InternalRef *>(Begin);
153-       return  ArrayRef ((const  uint8_t  *)B, sizeof (InternalRef) * Size);
154153    }
154+     auto  *B = cast<const  InternalRef *>(Begin);
155+     return  ArrayRef ((const  uint8_t  *)B, sizeof (InternalRef) * Size);
155156  }
156157
157158  InternalRefArrayRef (std::nullopt_t  = std::nullopt ) {
@@ -172,8 +173,6 @@ class InternalRefArrayRef {
172173  size_t  Size = 0 ;
173174};
174175
175- struct  OnDiskContent ;
176- 
177176// / Reference to a node. The node's data may not be stored in the database.
178177// / An \p ObjectID instance can only be used with the \p OnDiskGraphDB instance
179178// / it came from. \p ObjectIDs from different \p OnDiskGraphDB instances are not
@@ -199,11 +198,11 @@ class ObjectID {
199198// / Handle for a loaded node object.
200199class  ObjectHandle  {
201200public: 
201+   explicit  ObjectHandle (uint64_t  Opaque) : Opaque(Opaque) {}
202202  uint64_t  getOpaqueData () const  { return  Opaque; }
203203
204-   static  ObjectHandle fromOpaqueData (uint64_t  Opaque) {
205-     return  ObjectHandle (Opaque);
206-   }
204+   static  ObjectHandle fromFileOffset (FileOffset Offset);
205+   static  ObjectHandle fromMemory (uintptr_t  Ptr);
207206
208207  friend  bool  operator ==(const  ObjectHandle &LHS, const  ObjectHandle &RHS) {
209208    return  LHS.Opaque  == RHS.Opaque ;
@@ -213,10 +212,10 @@ class ObjectHandle {
213212  }
214213
215214private: 
216-   explicit  ObjectHandle (uint64_t  Opaque) : Opaque(Opaque) {}
217215  uint64_t  Opaque;
218216};
219217
218+ // / Iterator for ObjectID.
220219class  object_refs_iterator 
221220    : public iterator_facade_base<object_refs_iterator,
222221                                  std::random_access_iterator_tag, ObjectID> {
@@ -294,6 +293,7 @@ class OnDiskGraphDB {
294293  // / \returns the data part of the provided object handle.
295294  ArrayRef<char > getObjectData (ObjectHandle Node) const ;
296295
296+   // / \returns the object referenced by the provided object handle.
297297  object_refs_range getObjectRefs (ObjectHandle Node) const  {
298298    InternalRefArrayRef Refs = getInternalRefs (Node);
299299    return  make_range (Refs.begin (), Refs.end ());
@@ -315,6 +315,13 @@ class OnDiskGraphDB {
315315  // / Hashing function type for validation.
316316  using  HashingFuncT = function_ref<void (
317317      ArrayRef<ArrayRef<uint8_t >>, ArrayRef<char >, SmallVectorImpl<uint8_t > &)>;
318+ 
319+   // / Validate the OnDiskGraphDB.
320+   // /
321+   // / \param Deep if true, rehash all the objects to ensure no data
322+   // / corruption in stored objects, otherwise just validate the structure of
323+   // / CAS database.
324+   // / \param Hasher is the hashing function used for objects inside CAS.
318325  Error validate (bool  Deep, HashingFuncT Hasher) const ;
319326
320327  // / How to fault-in nodes if an upstream database is used.
@@ -347,19 +354,20 @@ class OnDiskGraphDB {
347354  ~OnDiskGraphDB ();
348355
349356private: 
357+   // / Forward declaration for a proxy for an ondisk index record.
350358  struct  IndexProxy ;
351-   class  TempFile ;
352-   class  MappedTempFile ;
353359
354360  enum  class  ObjectPresence  {
355361    Missing,
356362    InPrimaryDB,
357363    OnlyInUpstreamDB,
358364  };
359365
366+   // / Check if object exists and if it is on upstream only.
360367  Expected<ObjectPresence> getObjectPresence (ObjectID Ref,
361368                                             bool  CheckUpstream) const ;
362369
370+   // / \returns true if object can be found in database.
363371  bool  containsObject (ObjectID Ref, bool  CheckUpstream) const  {
364372    auto  Presence = getObjectPresence (Ref, CheckUpstream);
365373    if  (!Presence) {
@@ -379,46 +387,57 @@ class OnDiskGraphDB {
379387  // / When \p load is called for a node that doesn't exist, this function tries
380388  // / to load it from the upstream store and copy it to the primary one.
381389  Expected<std::optional<ObjectHandle>> faultInFromUpstream (ObjectID PrimaryID);
390+ 
391+   // / Import the entire tree from upstream with \p UpstreamNode as root.
382392  Error importFullTree (ObjectID PrimaryID, ObjectHandle UpstreamNode);
393+   // / Import only the \param UpstreamNode.
383394  Error importSingleNode (ObjectID PrimaryID, ObjectHandle UpstreamNode);
384395
396+   // / Found the IndexProxy for the hash.
385397  Expected<IndexProxy> indexHash (ArrayRef<uint8_t > Hash);
386398
399+   // / Get path for creating standalone data file.
400+   void  getStandalonePath (StringRef FileSuffix, const  IndexProxy &I,
401+                          SmallVectorImpl<char > &Path) const ;
402+   // / Create a standalone leaf file.
387403  Error createStandaloneLeaf (IndexProxy &I, ArrayRef<char > Data);
388404
389-   Expected<MappedTempFile> createTempFile (StringRef FinalPath, uint64_t  Size);
390- 
391-   OnDiskContent getContentFromHandle (ObjectHandle H) const ;
392- 
405+   // / \name Helper functions for internal data structures.
406+   // / \{
393407  static  InternalRef getInternalRef (ObjectID Ref) {
394408    return  InternalRef::getFromRawData (Ref.getOpaqueData ());
395409  }
410+ 
396411  static  ObjectID getExternalReference (InternalRef Ref) {
397412    return  ObjectID::fromOpaqueData (Ref.getRawData ());
398413  }
399414
400415  static  ObjectID getExternalReference (const  IndexProxy &I);
401416
402-   void  getStandalonePath (StringRef FileSuffix, const  IndexProxy &I,
403-                          SmallVectorImpl<char > &Path) const ;
417+   static  InternalRef makeInternalRef (FileOffset IndexOffset);
404418
405419  Expected<ArrayRef<uint8_t >> getDigest (InternalRef Ref) const ;
420+ 
406421  ArrayRef<uint8_t > getDigest (const  IndexProxy &I) const ;
407422
408423  Expected<IndexProxy> getIndexProxyFromRef (InternalRef Ref) const ;
409424
410-   static  InternalRef makeInternalRef (FileOffset IndexOffset);
411- 
412425  IndexProxy
413426  getIndexProxyFromPointer (OnDiskTrieRawHashMap::ConstOnDiskPtr P) const ;
414427
415428  InternalRefArrayRef getInternalRefs (ObjectHandle Node) const ;
429+   // / \}
416430
417-   void  recordStandaloneSizeIncrease (size_t  SizeIncrease);
431+   // / Get the atomic variable that keeps track of the standalone data storage
432+   // / size.
433+   std::atomic<uint64_t > &standaloneStorageSize () const ;
418434
419-   std::atomic<uint64_t > &getStandaloneStorageSize ();
435+   // / Increase the standalone data size.
436+   void  recordStandaloneSizeIncrease (size_t  SizeIncrease);
437+   // / Get the standalone data size.
420438  uint64_t  getStandaloneStorageSize () const ;
421439
440+   //  Private constructor.
422441  OnDiskGraphDB (StringRef RootPath, OnDiskTrieRawHashMap Index,
423442                OnDiskDataAllocator DataPool,
424443                std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy,
@@ -434,14 +453,19 @@ class OnDiskGraphDB {
434453  // / Data type is DataRecordHandle.
435454  OnDiskDataAllocator DataPool;
436455
437-   void  *StandaloneData; //  a StandaloneDataMap.
456+   // / A StandaloneDataMap.
457+   void  *StandaloneData = nullptr ;
438458
459+   // / Path to the root directory.
439460  std::string RootPath;
440461
441462  // / Optional on-disk store to be used for faulting-in nodes.
442463  std::unique_ptr<OnDiskGraphDB> UpstreamDB;
464+ 
465+   // / The policy used to fault in data from upstream.
443466  FaultInPolicy FIPolicy;
444467
468+   // / Debug Logger.
445469  std::shared_ptr<OnDiskCASLogger> Logger;
446470};
447471
0 commit comments