Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clang/test/CAS/daemon-cas-recovery.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

/// Construct a malformed CAS to recovery from.
// RUN: echo "abc" | llvm-cas --cas %t/cas --make-blob --data -
// RUN: rm %t/cas/v1.1/v11.data
// RUN: rm %t/cas/v1.1/data.v1
// RUN: not llvm-cas --cas %t/cas --validate --check-hash

// RUN: env LLVM_CACHE_CAS_PATH=%t/cas LLVM_CAS_FORCE_VALIDATION=1 %clang-cache \
Expand Down
6 changes: 3 additions & 3 deletions clang/test/CAS/depscan-cas-log.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@
// RUN: -cc1-args -cc1 -triple x86_64-apple-macosx11.0.0 -emit-obj %s -o %t/t.o -fcas-path %t/cas
// RUN: FileCheck %s --input-file %t/cas/v1.log

// CHECK: [[PID1:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v{{[0-9]+}}.index'
// CHECK: [[PID1:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}index.v{{[0-9]+}}'
// CHECK: [[PID1]] {{[0-9]*}}: create subtrie

// Even a minimal compilation involves at least 9 records for the cache key.
// CHECK-COUNT-9: [[PID1]] {{[0-9]*}}: create record

// CHECK: [[PID2:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}v{{[0-9]+}}.index'
// CHECK: [[PID2]] {{[0-9]*}}: close mmap '{{.*}}v{{[0-9]+}}.index'
// CHECK: [[PID2:[0-9]*]] {{[0-9]*}}: mmap '{{.*}}index.v{{[0-9]+}}'
// CHECK: [[PID2]] {{[0-9]*}}: close mmap '{{.*}}index.v{{[0-9]+}}'
2 changes: 1 addition & 1 deletion clang/test/CAS/validate-once.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// RUN: rm -rf %t

// RUN: llvm-cas --cas %t/cas --ingest %s
// RUN: mv %t/cas/v1.1/v11.data %t/cas/v1.1/v11.data.bak
// RUN: mv %t/cas/v1.1/data.v1 %t/cas/v1.1/data.v1.bak

// RUN: %clang -cc1depscand -execute %{clang-daemon-dir}/%basename_t -cas-args -fcas-path %t/cas -- \
// RUN: %clang -target x86_64-apple-macos11 -I %S/Inputs \
Expand Down
2 changes: 1 addition & 1 deletion llvm/include/llvm/CAS/OnDiskDataAllocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class OnDiskDataAllocator {

/// \returns the buffer that was allocated at \p create time, with size
/// \p UserHeaderSize.
MutableArrayRef<uint8_t> getUserHeader();
MutableArrayRef<uint8_t> getUserHeader() const;

size_t size() const;
size_t capacity() const;
Expand Down
82 changes: 53 additions & 29 deletions llvm/include/llvm/CAS/OnDiskGraphDB.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
//===- OnDiskGraphDB.h ------------------------------------------*- C++ -*-===//
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This declares OnDiskGraphDB, an ondisk CAS database with a fixed length
/// hash. This is the class that implements the database storage scheme without
/// exposing the hashing algorithm.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CAS_ONDISKGRAPHDB_H
#define LLVM_CAS_ONDISKGRAPHDB_H
Expand All @@ -16,16 +23,13 @@

namespace llvm::cas::ondisk {

/// 8B reference.
/// standard 8B reference inside OnDiskGraphDB.
class InternalRef {
public:
FileOffset getFileOffset() const { return FileOffset(getRawOffset()); }

FileOffset getFileOffset() const { return FileOffset(Data); }
uint64_t getRawData() const { return Data; }
uint64_t getRawOffset() const { return Data; }

static InternalRef getFromRawData(uint64_t Data) { return InternalRef(Data); }

static InternalRef getFromOffset(FileOffset Offset) {
return InternalRef(Offset.get());
}
Expand All @@ -40,19 +44,17 @@ class InternalRef {
uint64_t Data;
};

/// 4B reference.
/// compact 4B reference inside OnDiskGraphDB for smaller references.
class InternalRef4B {
public:
FileOffset getFileOffset() const { return FileOffset(Data); }

uint32_t getRawData() const { return Data; }

/// Shrink to 4B reference.
static std::optional<InternalRef4B> tryToShrink(InternalRef Ref) {
uint64_t Offset = Ref.getRawOffset();
uint64_t Offset = Ref.getRawData();
if (Offset > UINT32_MAX)
return std::nullopt;

return InternalRef4B(Offset);
}

Expand Down Expand Up @@ -148,10 +150,9 @@ class InternalRefArrayRef {
if (is4B()) {
auto *B = cast<const InternalRef4B *>(Begin);
return ArrayRef((const uint8_t *)B, sizeof(InternalRef4B) * Size);
} else {
auto *B = cast<const InternalRef *>(Begin);
return ArrayRef((const uint8_t *)B, sizeof(InternalRef) * Size);
}
auto *B = cast<const InternalRef *>(Begin);
return ArrayRef((const uint8_t *)B, sizeof(InternalRef) * Size);
}

InternalRefArrayRef(std::nullopt_t = std::nullopt) {
Expand All @@ -172,8 +173,6 @@ class InternalRefArrayRef {
size_t Size = 0;
};

struct OnDiskContent;

/// Reference to a node. The node's data may not be stored in the database.
/// An \p ObjectID instance can only be used with the \p OnDiskGraphDB instance
/// it came from. \p ObjectIDs from different \p OnDiskGraphDB instances are not
Expand Down Expand Up @@ -217,6 +216,7 @@ class ObjectHandle {
uint64_t Opaque;
};

/// Iterator for ObjectID.
class object_refs_iterator
: public iterator_facade_base<object_refs_iterator,
std::random_access_iterator_tag, ObjectID> {
Expand Down Expand Up @@ -294,6 +294,7 @@ class OnDiskGraphDB {
/// \returns the data part of the provided object handle.
ArrayRef<char> getObjectData(ObjectHandle Node) const;

/// \returns the object referenced by the provided object handle.
object_refs_range getObjectRefs(ObjectHandle Node) const {
InternalRefArrayRef Refs = getInternalRefs(Node);
return make_range(Refs.begin(), Refs.end());
Expand All @@ -315,6 +316,13 @@ class OnDiskGraphDB {
/// Hashing function type for validation.
using HashingFuncT = function_ref<void(
ArrayRef<ArrayRef<uint8_t>>, ArrayRef<char>, SmallVectorImpl<uint8_t> &)>;

/// Validate the OnDiskGraphDB.
///
/// \param Deep if true, rehash all the objects to make sure no data
/// corruption in stored object, otherwise just validate the structure of
/// CAS database.
/// \param Hasher is the hashing function used for objects inside CAS.
Error validate(bool Deep, HashingFuncT Hasher) const;

/// How to fault-in nodes if an upstream database is used.
Expand Down Expand Up @@ -347,19 +355,20 @@ class OnDiskGraphDB {
~OnDiskGraphDB();

private:
/// Forward declaration for a proxy for an ondisk index record.
struct IndexProxy;
class TempFile;
class MappedTempFile;

enum class ObjectPresence {
Missing,
InPrimaryDB,
OnlyInUpstreamDB,
};

// Check if object exists and if it is on upstream only.
Expected<ObjectPresence> getObjectPresence(ObjectID Ref,
bool CheckUpstream) const;

// \returns true if object can be found in database.
bool containsObject(ObjectID Ref, bool CheckUpstream) const {
auto Presence = getObjectPresence(Ref, CheckUpstream);
if (!Presence) {
Expand All @@ -379,46 +388,56 @@ class OnDiskGraphDB {
/// When \p load is called for a node that doesn't exist, this function tries
/// to load it from the upstream store and copy it to the primary one.
Expected<std::optional<ObjectHandle>> faultInFromUpstream(ObjectID PrimaryID);

/// Import the entire tree from upstream with \param UpstreamNode as root.
Error importFullTree(ObjectID PrimaryID, ObjectHandle UpstreamNode);
/// Import only the \param UpstreamNode.
Error importSingleNode(ObjectID PrimaryID, ObjectHandle UpstreamNode);

/// Found the IndexProxy for the hash.
Expected<IndexProxy> indexHash(ArrayRef<uint8_t> Hash);

/// Get path for creating standalone data file.
void getStandalonePath(StringRef FileSuffix, const IndexProxy &I,
SmallVectorImpl<char> &Path) const;
/// Create a standalone leaf file.
Error createStandaloneLeaf(IndexProxy &I, ArrayRef<char> Data);

Expected<MappedTempFile> createTempFile(StringRef FinalPath, uint64_t Size);

OnDiskContent getContentFromHandle(ObjectHandle H) const;

/// @name Helper functions for internal data structures.
/// @{
static InternalRef getInternalRef(ObjectID Ref) {
return InternalRef::getFromRawData(Ref.getOpaqueData());
}

static ObjectID getExternalReference(InternalRef Ref) {
return ObjectID::fromOpaqueData(Ref.getRawData());
}

static ObjectID getExternalReference(const IndexProxy &I);

void getStandalonePath(StringRef FileSuffix, const IndexProxy &I,
SmallVectorImpl<char> &Path) const;
static InternalRef makeInternalRef(FileOffset IndexOffset);

Expected<ArrayRef<uint8_t>> getDigest(InternalRef Ref) const;

ArrayRef<uint8_t> getDigest(const IndexProxy &I) const;

Expected<IndexProxy> getIndexProxyFromRef(InternalRef Ref) const;

static InternalRef makeInternalRef(FileOffset IndexOffset);

IndexProxy
getIndexProxyFromPointer(OnDiskTrieRawHashMap::ConstOnDiskPtr P) const;

InternalRefArrayRef getInternalRefs(ObjectHandle Node) const;
/// @}

void recordStandaloneSizeIncrease(size_t SizeIncrease);
/// Get the atomic variable that keeps track of the standalone data storage size.
std::atomic<uint64_t> &standaloneStorageSize() const;

std::atomic<uint64_t> &getStandaloneStorageSize();
/// Increase the standalone data size.
void recordStandaloneSizeIncrease(size_t SizeIncrease);
/// Get the standalone data size.
uint64_t getStandaloneStorageSize() const;

// Private constructor.
OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
OnDiskDataAllocator DataPool,
std::unique_ptr<OnDiskGraphDB> UpstreamDB, FaultInPolicy Policy,
Expand All @@ -434,14 +453,19 @@ class OnDiskGraphDB {
/// Data type is DataRecordHandle.
OnDiskDataAllocator DataPool;

void *StandaloneData; // a StandaloneDataMap.
// a StandaloneDataMap.
void *StandaloneData;

// Path to the root directory.
std::string RootPath;

/// Optional on-disk store to be used for faulting-in nodes.
// Optional on-disk store to be used for faulting-in nodes.
std::unique_ptr<OnDiskGraphDB> UpstreamDB;

// The policy used to fault in data from upstream.
FaultInPolicy FIPolicy;

// Debug Logger.
std::shared_ptr<OnDiskCASLogger> Logger;
};

Expand Down
17 changes: 12 additions & 5 deletions llvm/include/llvm/CAS/OnDiskKeyValueDB.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
//===- OnDiskKeyValueDB.h ---------------------------------------*- C++ -*-===//
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This declares OnDiskKeyValueDB, a key value storage database of fixed size
/// key and value.
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_CAS_ONDISKKEYVALUEDB_H
#define LLVM_CAS_ONDISKKEYVALUEDB_H
Expand Down Expand Up @@ -35,9 +41,7 @@ class OnDiskKeyValueDB {
Expected<std::optional<ArrayRef<char>>> get(ArrayRef<uint8_t> Key);

/// \returns Total size of stored data.
size_t getStorageSize() const {
return Cache.size();
}
size_t getStorageSize() const { return Cache.size(); }

/// \returns The precentage of space utilization of hard space limits.
///
Expand All @@ -60,7 +64,10 @@ class OnDiskKeyValueDB {
StringRef ValueName, size_t ValueSize,
std::shared_ptr<OnDiskCASLogger> Logger = nullptr);

using CheckValueT = function_ref<Error(FileOffset Offset, ArrayRef<char>)>;
/// Validate the storage with a callback \p CheckValue to check the stored
/// value.
using CheckValueT =
function_ref<Error(FileOffset Offset, ArrayRef<char> Data)>;
Error validate(CheckValueT CheckValue) const;

private:
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/Config/llvm-config.h.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,7 @@
coverage bugs, and to 0 otherwise. */
#cmakedefine01 LLVM_ENABLE_DEBUGLOC_TRACKING_ORIGIN

/* Define to 1 to enable LLVM OnDisk Content Addressable Storage */
#cmakedefine01 LLVM_ENABLE_ONDISK_CAS

#endif
4 changes: 0 additions & 4 deletions llvm/lib/CAS/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
if (LLVM_ENABLE_ONDISK_CAS)
add_definitions(-DLLVM_ENABLE_ONDISK_CAS=1)
endif()

add_llvm_component_library(LLVMCAS
ActionCache.cpp
ActionCaches.cpp
Expand Down
Loading