1- // ===- OnDiskGraphDB.h  ------------------------------------------*- C++ -* -===//
1+ // ===--------------------------------------------------------------------- -===//
22// 
33//  Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44//  See https://llvm.org/LICENSE.txt for license information.
55//  SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66// 
77// ===----------------------------------------------------------------------===//
8+ // 
9+ // / \file
10+ // / This declares OnDiskGraphDB, an ondisk CAS database with a fixed length
11+ // / hash. This is the class that implements the database storage scheme without
12+ // / exposing the hashing algorithm.
13+ // 
14+ // ===----------------------------------------------------------------------===//
815
916#ifndef  LLVM_CAS_ONDISKGRAPHDB_H
1017#define  LLVM_CAS_ONDISKGRAPHDB_H 
1623
1724namespace  llvm ::cas::ondisk {
1825
19- // / 8B reference.
26+ // / Standard  8B reference inside OnDiskGraphDB .
2027class  InternalRef  {
2128public: 
22-   FileOffset getFileOffset () const  { return  FileOffset (getRawOffset ()); }
23- 
29+   FileOffset getFileOffset () const  { return  FileOffset (Data); }
2430  uint64_t  getRawData () const  { return  Data; }
25-   uint64_t  getRawOffset () const  { return  Data; }
2631
2732  static  InternalRef getFromRawData (uint64_t  Data) { return  InternalRef (Data); }
28- 
2933  static  InternalRef getFromOffset (FileOffset Offset) {
3034    return  InternalRef (Offset.get ());
3135  }
@@ -40,19 +44,17 @@ class InternalRef {
4044  uint64_t  Data;
4145};
4246
43- // / 4B reference.
47+ // / Compact  4B reference inside OnDiskGraphDB for smaller references .
4448class  InternalRef4B  {
4549public: 
4650  FileOffset getFileOffset () const  { return  FileOffset (Data); }
47- 
4851  uint32_t  getRawData () const  { return  Data; }
4952
5053  // / Shrink to 4B reference.
5154  static  std::optional<InternalRef4B> tryToShrink (InternalRef Ref) {
52-     uint64_t  Offset = Ref.getRawOffset ();
55+     uint64_t  Offset = Ref.getRawData ();
5356    if  (Offset > UINT32_MAX)
5457      return  std::nullopt ;
55- 
5658    return  InternalRef4B (Offset);
5759  }
5860
@@ -148,10 +150,9 @@ class InternalRefArrayRef {
148150    if  (is4B ()) {
149151      auto  *B = cast<const  InternalRef4B *>(Begin);
150152      return  ArrayRef ((const  uint8_t  *)B, sizeof (InternalRef4B) * Size);
151-     } else  {
152-       auto  *B = cast<const  InternalRef *>(Begin);
153-       return  ArrayRef ((const  uint8_t  *)B, sizeof (InternalRef) * Size);
154153    }
154+     auto  *B = cast<const  InternalRef *>(Begin);
155+     return  ArrayRef ((const  uint8_t  *)B, sizeof (InternalRef) * Size);
155156  }
156157
157158  InternalRefArrayRef (std::nullopt_t  = std::nullopt ) {
@@ -172,8 +173,6 @@ class InternalRefArrayRef {
172173  size_t  Size = 0 ;
173174};
174175
175- struct  OnDiskContent ;
176- 
177176// / Reference to a node. The node's data may not be stored in the database.
178177// / An \p ObjectID instance can only be used with the \p OnDiskGraphDB instance
179178// / it came from. \p ObjectIDs from different \p OnDiskGraphDB instances are not
@@ -217,6 +216,7 @@ class ObjectHandle {
217216  uint64_t  Opaque;
218217};
219218
219+ // / Iterator for ObjectID.
220220class  object_refs_iterator 
221221    : public iterator_facade_base<object_refs_iterator,
222222                                  std::random_access_iterator_tag, ObjectID> {
@@ -294,6 +294,7 @@ class OnDiskGraphDB {
294294  // / \returns the data part of the provided object handle.
295295  ArrayRef<char > getObjectData (ObjectHandle Node) const ;
296296
297+   // / \returns the object referenced by the provided object handle.
297298  object_refs_range getObjectRefs (ObjectHandle Node) const  {
298299    InternalRefArrayRef Refs = getInternalRefs (Node);
299300    return  make_range (Refs.begin (), Refs.end ());
@@ -315,6 +316,13 @@ class OnDiskGraphDB {
315316  // / Hashing function type for validation.
316317  using  HashingFuncT = function_ref<void (
317318      ArrayRef<ArrayRef<uint8_t >>, ArrayRef<char >, SmallVectorImpl<uint8_t > &)>;
319+ 
320+   // / Validate the OnDiskGraphDB.
321+   // /
322+   // / \param Deep if true, rehash all the objects to ensure no data
323+   // / corruption in stored objects, otherwise just validate the structure of
324+   // / CAS database.
325+   // / \param Hasher is the hashing function used for objects inside CAS.
318326  Error validate (bool  Deep, HashingFuncT Hasher) const ;
319327
320328  // / How to fault-in nodes if an upstream database is used.
@@ -347,19 +355,20 @@ class OnDiskGraphDB {
347355  ~OnDiskGraphDB ();
348356
349357private: 
358+   // / Forward declaration for a proxy for an ondisk index record.
350359  struct  IndexProxy ;
351-   class  TempFile ;
352-   class  MappedTempFile ;
353360
354361  enum  class  ObjectPresence  {
355362    Missing,
356363    InPrimaryDB,
357364    OnlyInUpstreamDB,
358365  };
359366
367+   // / Check if object exists and if it is on upstream only.
360368  Expected<ObjectPresence> getObjectPresence (ObjectID Ref,
361369                                             bool  CheckUpstream) const ;
362370
371+   // / \returns true if object can be found in database.
363372  bool  containsObject (ObjectID Ref, bool  CheckUpstream) const  {
364373    auto  Presence = getObjectPresence (Ref, CheckUpstream);
365374    if  (!Presence) {
@@ -379,46 +388,56 @@ class OnDiskGraphDB {
379388  // / When \p load is called for a node that doesn't exist, this function tries
380389  // / to load it from the upstream store and copy it to the primary one.
381390  Expected<std::optional<ObjectHandle>> faultInFromUpstream (ObjectID PrimaryID);
391+ 
392+   // / Import the entire tree from upstream with \p UpstreamNode as root.
382393  Error importFullTree (ObjectID PrimaryID, ObjectHandle UpstreamNode);
394+   // / Import only the \param UpstreamNode.
383395  Error importSingleNode (ObjectID PrimaryID, ObjectHandle UpstreamNode);
384396
397+   // / Found the IndexProxy for the hash.
385398  Expected<IndexProxy> indexHash (ArrayRef<uint8_t > Hash);
386399
400+   // / Get path for creating standalone data file.
401+   void  getStandalonePath (StringRef FileSuffix, const  IndexProxy &I,
402+                          SmallVectorImpl<char > &Path) const ;
403+   // / Create a standalone leaf file.
387404  Error createStandaloneLeaf (IndexProxy &I, ArrayRef<char > Data);
388405
389-   Expected<MappedTempFile> createTempFile (StringRef FinalPath, uint64_t  Size);
390- 
391-   OnDiskContent getContentFromHandle (ObjectHandle H) const ;
392- 
406+   // / @name Helper functions for internal data structures.
407+   // / @{
393408  static  InternalRef getInternalRef (ObjectID Ref) {
394409    return  InternalRef::getFromRawData (Ref.getOpaqueData ());
395410  }
411+ 
396412  static  ObjectID getExternalReference (InternalRef Ref) {
397413    return  ObjectID::fromOpaqueData (Ref.getRawData ());
398414  }
399415
400416  static  ObjectID getExternalReference (const  IndexProxy &I);
401417
402-   void  getStandalonePath (StringRef FileSuffix, const  IndexProxy &I,
403-                          SmallVectorImpl<char > &Path) const ;
418+   static  InternalRef makeInternalRef (FileOffset IndexOffset);
404419
405420  Expected<ArrayRef<uint8_t >> getDigest (InternalRef Ref) const ;
421+ 
406422  ArrayRef<uint8_t > getDigest (const  IndexProxy &I) const ;
407423
408424  Expected<IndexProxy> getIndexProxyFromRef (InternalRef Ref) const ;
409425
410-   static  InternalRef makeInternalRef (FileOffset IndexOffset);
411- 
412426  IndexProxy
413427  getIndexProxyFromPointer (OnDiskTrieRawHashMap::ConstOnDiskPtr P) const ;
414428
415429  InternalRefArrayRef getInternalRefs (ObjectHandle Node) const ;
430+   // / @}
416431
417-   void  recordStandaloneSizeIncrease (size_t  SizeIncrease);
432+   // / Get the atomic variable that keeps track of the standalone data storage size.
433+   std::atomic<uint64_t > &standaloneStorageSize () const ;
418434
419-   std::atomic<uint64_t > &getStandaloneStorageSize ();
435+   // / Increase the standalone data size.
436+   void  recordStandaloneSizeIncrease (size_t  SizeIncrease);
437+   // / Get the standalone data size.
420438  uint64_t  getStandaloneStorageSize () const ;
421439
440+   //  Private constructor.
422441  OnDiskGraphDB (StringRef RootPath, OnDiskTrieRawHashMap Index,
423442                OnDiskDataAllocator DataPool,
424443                std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy,
@@ -434,14 +453,19 @@ class OnDiskGraphDB {
434453  // / Data type is DataRecordHandle.
435454  OnDiskDataAllocator DataPool;
436455
437-   void  *StandaloneData; //  a StandaloneDataMap.
456+   // / A StandaloneDataMap.
457+   void  *StandaloneData;
438458
459+   // / Path to the root directory.
439460  std::string RootPath;
440461
441462  // / Optional on-disk store to be used for faulting-in nodes.
442463  std::unique_ptr<OnDiskGraphDB> UpstreamDB;
464+ 
465+   // / The policy used to fault in data from upstream.
443466  FaultInPolicy FIPolicy;
444467
468+   // / Debug Logger.
445469  std::shared_ptr<OnDiskCASLogger> Logger;
446470};
447471
0 commit comments