1- // ===- OnDiskGraphDB.h  ------------------------------------------*- C++ -* -===//
1+ // ===--------------------------------------------------------------------- -===//
22// 
33//  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44//  See https://llvm.org/LICENSE.txt for license information.
55//  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66// 
77// ===----------------------------------------------------------------------===//
8+ // 
9+ // / \file
10+ // / This declares OnDiskGraphDB, an ondisk CAS database with a fixed length
11+ // / hash. This is the class that implements the database storage scheme without
12+ // / exposing the hashing algorithm.
13+ // 
14+ // ===----------------------------------------------------------------------===//
815
916#ifndef  LLVM_CAS_ONDISKGRAPHDB_H
1017#define  LLVM_CAS_ONDISKGRAPHDB_H 
1623
1724namespace  llvm ::cas::ondisk {
1825
19- // / 8B reference.
26+ // / standard  8B reference inside OnDiskGraphDB .
2027class  InternalRef  {
2128public: 
22-   FileOffset getFileOffset () const  { return  FileOffset (getRawOffset ()); }
23- 
29+   FileOffset getFileOffset () const  { return  FileOffset (Data); }
2430  uint64_t  getRawData () const  { return  Data; }
25-   uint64_t  getRawOffset () const  { return  Data; }
2631
2732  static  InternalRef getFromRawData (uint64_t  Data) { return  InternalRef (Data); }
28- 
2933  static  InternalRef getFromOffset (FileOffset Offset) {
3034    return  InternalRef (Offset.get ());
3135  }
@@ -40,19 +44,17 @@ class InternalRef {
4044  uint64_t  Data;
4145};
4246
43- // / 4B reference.
47+ // / compact  4B reference inside OnDiskGraphDB for smaller references .
4448class  InternalRef4B  {
4549public: 
4650  FileOffset getFileOffset () const  { return  FileOffset (Data); }
47- 
4851  uint32_t  getRawData () const  { return  Data; }
4952
5053  // / Shrink to 4B reference.
5154  static  std::optional<InternalRef4B> tryToShrink (InternalRef Ref) {
52-     uint64_t  Offset = Ref.getRawOffset ();
55+     uint64_t  Offset = Ref.getRawData ();
5356    if  (Offset > UINT32_MAX)
5457      return  std::nullopt ;
55- 
5658    return  InternalRef4B (Offset);
5759  }
5860
@@ -148,10 +150,9 @@ class InternalRefArrayRef {
148150    if  (is4B ()) {
149151      auto  *B = cast<const  InternalRef4B *>(Begin);
150152      return  ArrayRef ((const  uint8_t  *)B, sizeof (InternalRef4B) * Size);
151-     } else  {
152-       auto  *B = cast<const  InternalRef *>(Begin);
153-       return  ArrayRef ((const  uint8_t  *)B, sizeof (InternalRef) * Size);
154153    }
154+     auto  *B = cast<const  InternalRef *>(Begin);
155+     return  ArrayRef ((const  uint8_t  *)B, sizeof (InternalRef) * Size);
155156  }
156157
157158  InternalRefArrayRef (std::nullopt_t  = std::nullopt ) {
@@ -172,6 +173,7 @@ class InternalRefArrayRef {
172173  size_t  Size = 0 ;
173174};
174175
176+ // / Proxy for any on-disk object or raw data.
175177struct  OnDiskContent ;
176178
177179// / Reference to a node. The node's data may not be stored in the database.
@@ -217,6 +219,7 @@ class ObjectHandle {
217219  uint64_t  Opaque;
218220};
219221
222+ // / Iterator for ObjectID.
220223class  object_refs_iterator 
221224    : public iterator_facade_base<object_refs_iterator,
222225                                  std::random_access_iterator_tag, ObjectID> {
@@ -294,6 +297,7 @@ class OnDiskGraphDB {
294297  // / \returns the data part of the provided object handle.
295298  ArrayRef<char > getObjectData (ObjectHandle Node) const ;
296299
300+   // / \returns the object referenced by the provided object handle.
297301  object_refs_range getObjectRefs (ObjectHandle Node) const  {
298302    InternalRefArrayRef Refs = getInternalRefs (Node);
299303    return  make_range (Refs.begin (), Refs.end ());
@@ -315,6 +319,13 @@ class OnDiskGraphDB {
315319  // / Hashing function type for validation.
316320  using  HashingFuncT = function_ref<void (
317321      ArrayRef<ArrayRef<uint8_t >>, ArrayRef<char >, SmallVectorImpl<uint8_t > &)>;
322+ 
323+   // / Validate the OnDiskGraphDB.
324+   // /
325+   // / \param Deep if true, rehash all the objects to make sure no data
326+   // / corruption in stored object, otherwise just validate the structure of
327+   // / CAS database.
328+   // / \param Hasher is the hashing function used for objects inside CAS.
318329  Error validate (bool  Deep, HashingFuncT Hasher) const ;
319330
320331  // / How to fault-in nodes if an upstream database is used.
@@ -357,9 +368,11 @@ class OnDiskGraphDB {
357368    OnlyInUpstreamDB,
358369  };
359370
371+   //  Check if object exists and if it is on upstream only.
360372  Expected<ObjectPresence> getObjectPresence (ObjectID Ref,
361373                                             bool  CheckUpstream) const ;
362374
375+   //  \returns true if object can be found in database.
363376  bool  containsObject (ObjectID Ref, bool  CheckUpstream) const  {
364377    auto  Presence = getObjectPresence (Ref, CheckUpstream);
365378    if  (!Presence) {
@@ -379,46 +392,60 @@ class OnDiskGraphDB {
379392  // / When \p load is called for a node that doesn't exist, this function tries
380393  // / to load it from the upstream store and copy it to the primary one.
381394  Expected<std::optional<ObjectHandle>> faultInFromUpstream (ObjectID PrimaryID);
395+ 
396+   // / Import the entire tree from upstream with \param UpstreamNode as root.
382397  Error importFullTree (ObjectID PrimaryID, ObjectHandle UpstreamNode);
398+   // / Import only the \param UpstreamNode.
383399  Error importSingleNode (ObjectID PrimaryID, ObjectHandle UpstreamNode);
384400
401+   // / Found the IndexProxy for the hash.
385402  Expected<IndexProxy> indexHash (ArrayRef<uint8_t > Hash);
386403
404+   // / Get path for creating standalone data file.
405+   void  getStandalonePath (StringRef FileSuffix, const  IndexProxy &I,
406+                          SmallVectorImpl<char > &Path) const ;
407+   // / Create a standalone leaf file.
387408  Error createStandaloneLeaf (IndexProxy &I, ArrayRef<char > Data);
388- 
409+    // / Create temporary file for standalone file storage. 
389410  Expected<MappedTempFile> createTempFile (StringRef FinalPath, uint64_t  Size);
390411
391-   OnDiskContent  getContentFromHandle (ObjectHandle H)  const ; 
392- 
412+   // / @name Helper functions for internal data structures. 
413+    // / @{ 
393414  static  InternalRef getInternalRef (ObjectID Ref) {
394415    return  InternalRef::getFromRawData (Ref.getOpaqueData ());
395416  }
417+ 
396418  static  ObjectID getExternalReference (InternalRef Ref) {
397419    return  ObjectID::fromOpaqueData (Ref.getRawData ());
398420  }
399421
400422  static  ObjectID getExternalReference (const  IndexProxy &I);
401423
402-   void  getStandalonePath (StringRef FileSuffix, const  IndexProxy &I,
403-                          SmallVectorImpl<char > &Path) const ;
424+   static  InternalRef makeInternalRef (FileOffset IndexOffset);
404425
405426  Expected<ArrayRef<uint8_t >> getDigest (InternalRef Ref) const ;
427+ 
406428  ArrayRef<uint8_t > getDigest (const  IndexProxy &I) const ;
407429
408-   Expected<IndexProxy>  getIndexProxyFromRef (InternalRef Ref ) const ;
430+   OnDiskContent  getContentFromHandle (ObjectHandle H ) const ;
409431
410-   static   InternalRef makeInternalRef (FileOffset IndexOffset) ;
432+   Expected<IndexProxy>  getIndexProxyFromRef ( InternalRef Ref)  const ;
411433
412434  IndexProxy
413435  getIndexProxyFromPointer (OnDiskTrieRawHashMap::ConstOnDiskPtr P) const ;
414436
415437  InternalRefArrayRef getInternalRefs (ObjectHandle Node) const ;
438+   // / @}
416439
417-   void  recordStandaloneSizeIncrease (size_t  SizeIncrease);
440+   // / Get the atomic variable that keeps track of the standalone data storage size.
441+   std::atomic<uint64_t > &standaloneStorageSize () const ;
418442
419-   std::atomic<uint64_t > &getStandaloneStorageSize ();
443+   // / Increase the standalone data size.
444+   void  recordStandaloneSizeIncrease (size_t  SizeIncrease);
445+   // / Get the standalone data size.
420446  uint64_t  getStandaloneStorageSize () const ;
421447
448+   //  Private constructor.
422449  OnDiskGraphDB (StringRef RootPath, OnDiskTrieRawHashMap Index,
423450                OnDiskDataAllocator DataPool,
424451                std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy,
@@ -434,14 +461,19 @@ class OnDiskGraphDB {
434461  // / Data type is DataRecordHandle.
435462  OnDiskDataAllocator DataPool;
436463
437-   void  *StandaloneData; //  a StandaloneDataMap.
464+   //  a StandaloneDataMap.
465+   void  *StandaloneData;
438466
467+   //  Path to the root directory.
439468  std::string RootPath;
440469
441-   // /  Optional on-disk store to be used for faulting-in nodes.
470+   //  Optional on-disk store to be used for faulting-in nodes.
442471  std::unique_ptr<OnDiskGraphDB> UpstreamDB;
472+ 
473+   //  The policy used to fault in data from upstream.
443474  FaultInPolicy FIPolicy;
444475
476+   //  Debug Logger.
445477  std::shared_ptr<OnDiskCASLogger> Logger;
446478};
447479
0 commit comments