diff --git a/CHANGELOG.md b/CHANGELOG.md index c0f962fba52..b522dc133bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ ### Fixed * ([#????](https://github.com/realm/realm-core/issues/????), since v?.?.?) * Fix some client resets (such as migrating to flexible sync) potentially failing with AutoClientResetFailed if a new client reset condition (such as rolling back a flexible sync migration) occurred before the first one completed. ([PR #7542](https://github.com/realm/realm-core/pull/7542), since v13.11.0) +* Encrypted files on Windows had a maximum size of 2GB even on x64 due to internal usage of `off_t`, which is a 32-bit type on 64-bit Windows ([PR #7698](https://github.com/realm/realm-core/pull/7698), since the introduction of encryption support on Windows in v3.0.0). +* The encryption code no longer behaves differently depending on the system page size, which should entirely eliminate a recurring source of bugs related to copying encrypted Realm files between platforms with different page sizes. One known outstanding bug was ([RNET-1141](https://github.com/realm/realm-dotnet/issues/3592)), where opening files on a system with a larger page size than the writing system would attempt to read sections of the file which had never been written to ([PR #7698](https://github.com/realm/realm-core/pull/7698)). +* There were several complicated scenarios which could result in stale reads from encrypted files in multiprocess scenarios. These were very difficult to hit and would typically lead to a crash, either due to an assertion failure or DecryptionFailure being thrown ([PR #7698](https://github.com/realm/realm-core/pull/7698), since v13.9.0). +* Encrypted files have some benign data races where we can memcpy a block of memory while another thread is writing to a limited range of it. It is logically impossible to ever read from that range when this happens, but Thread Sanitizer quite reasonably complains about this. We now perform a slower operations when running with TSan which avoids this benign race ([PR #7698](https://github.com/realm/realm-core/pull/7698)). +* Tokenizing strings for full-text search could pass values outside the range [-1, 255] to `isspace()`, which is undefined behavior ([PR #7698](https://github.com/realm/realm-core/pull/7698), since the introduction of FTS in v13.0.0). ### Breaking changes * Any `stitch_` prefixed fields in the `BsonDocument` returned from `app::User::custom_data()` are being renamed on the server to have a `baas_` prefix instead ([PR #7769](https://github.com/realm/realm-core/pull/7769)). @@ -21,6 +26,7 @@ * Removed references to `stitch_` fields in access tokens in sync unit tests ([PR #7769](https://github.com/realm/realm-core/pull/7769)). * Added back iOS simulator testing to evergreen after Jenkins went away ([PR #7758](https://github.com/realm/realm-core/pull/7758)). * `realm-trawler -c` did not work on Realm using SyncClient history ([PR #7734](https://github.com/realm/realm-core/pull/7734)). +* `File::Map`'s move constructor and assignment operator left `m_fd` unchanged, which appears to have never actually resulted in problems with how it was used ([PR #7698](https://github.com/realm/realm-core/pull/7698)). ---------------------------------------------- diff --git a/src/realm/alloc.cpp b/src/realm/alloc.cpp index f15cddfdffd..7f9817e2fbe 100644 --- a/src/realm/alloc.cpp +++ b/src/realm/alloc.cpp @@ -119,9 +119,7 @@ char* Allocator::translate_less_critical(RefTranslation* ref_translation_ptr, re RefTranslation& txl = ref_translation_ptr[idx]; size_t offset = ref - get_section_base(idx); char* addr = txl.mapping_addr + offset; -#if REALM_ENABLE_ENCRYPTION - realm::util::encryption_read_barrier(addr, NodeHeader::header_size, txl.encrypted_mapping, nullptr); -#endif + util::encryption_read_barrier(addr, NodeHeader::header_size, txl.encrypted_mapping); auto size = NodeHeader::get_byte_size_from_header(addr); bool crosses_mapping = offset + size > (1 << section_shift); // Move the limit on use of the existing primary mapping. @@ -135,27 +133,21 @@ char* Allocator::translate_less_critical(RefTranslation* ref_translation_ptr, re } if (REALM_LIKELY(!crosses_mapping)) { // Array fits inside primary mapping, no new mapping needed. -#if REALM_ENABLE_ENCRYPTION - realm::util::encryption_read_barrier(addr, size, txl.encrypted_mapping, nullptr); -#endif + util::encryption_read_barrier(addr, size, txl.encrypted_mapping); return addr; } - else { - // we need a cross-over mapping. If one is already established, use that. - auto xover_mapping_addr = txl.xover_mapping_addr.load(std::memory_order_acquire); - if (!xover_mapping_addr) { - // we need to establish a xover mapping - or wait for another thread to finish - // establishing one: - const_cast(this)->get_or_add_xover_mapping(txl, idx, offset, size); - // reload (can be relaxed since the call above synchronizes on a mutex) - xover_mapping_addr = txl.xover_mapping_addr.load(std::memory_order_relaxed); - } - // array is now known to be inside the established xover mapping: - addr = xover_mapping_addr + (offset - txl.xover_mapping_base); -#if REALM_ENABLE_ENCRYPTION - realm::util::encryption_read_barrier(addr, size, txl.xover_encrypted_mapping, nullptr); -#endif - return addr; + // we need a cross-over mapping. If one is already established, use that. + auto xover_mapping_addr = txl.xover_mapping_addr.load(std::memory_order_acquire); + if (!xover_mapping_addr) { + // we need to establish a xover mapping - or wait for another thread to finish + // establishing one: + const_cast(this)->get_or_add_xover_mapping(txl, idx, offset, size); + // reload (can be relaxed since the call above synchronizes on a mutex) + xover_mapping_addr = txl.xover_mapping_addr.load(std::memory_order_relaxed); } + // array is now known to be inside the established xover mapping: + addr = xover_mapping_addr + (offset - txl.xover_mapping_base); + util::encryption_read_barrier(addr, size, txl.xover_encrypted_mapping); + return addr; } } // namespace realm diff --git a/src/realm/alloc.hpp b/src/realm/alloc.hpp index f3515e36826..2a18e93ab1d 100644 --- a/src/realm/alloc.hpp +++ b/src/realm/alloc.hpp @@ -171,7 +171,7 @@ class Allocator { // into equal chunks. struct RefTranslation { char* mapping_addr; - uint64_t cookie; + uint64_t cookie = 0x1234567890; std::atomic lowest_possible_xover_offset = 0; // member 'xover_mapping_addr' is used for memory synchronization of the fields @@ -183,14 +183,12 @@ class Allocator { #if REALM_ENABLE_ENCRYPTION util::EncryptedFileMapping* encrypted_mapping = nullptr; util::EncryptedFileMapping* xover_encrypted_mapping = nullptr; +#else + static inline util::EncryptedFileMapping* const encrypted_mapping = nullptr; + static inline util::EncryptedFileMapping* const xover_encrypted_mapping = nullptr; #endif - explicit RefTranslation(char* addr) + explicit RefTranslation(char* addr = nullptr) : mapping_addr(addr) - , cookie(0x1234567890) - { - } - RefTranslation() - : RefTranslation(nullptr) { } ~RefTranslation() @@ -222,7 +220,7 @@ class Allocator { }; // This pointer may be changed concurrently with access, so make sure it is // atomic! - std::atomic m_ref_translation_ptr; + std::atomic m_ref_translation_ptr{nullptr}; /// The specified size must be divisible by 8, and must not be /// zero. @@ -252,7 +250,7 @@ class Allocator { char* translate_critical(RefTranslation*, ref_type ref) const noexcept; char* translate_less_critical(RefTranslation*, ref_type ref) const noexcept; virtual void get_or_add_xover_mapping(RefTranslation&, size_t, size_t, size_t) = 0; - Allocator() noexcept; + Allocator() noexcept = default; size_t get_section_index(size_t pos) const noexcept; inline size_t get_section_base(size_t index) const noexcept; @@ -271,11 +269,9 @@ class Allocator { // used to detect if the allocator (and owning structure, e.g. Table) // is recycled. Mismatch on this counter will cause accesors // lower in the hierarchy to throw if access is attempted. - std::atomic m_content_versioning_counter; - - std::atomic m_storage_versioning_counter; - - std::atomic m_instance_versioning_counter; + std::atomic m_content_versioning_counter{0}; + std::atomic m_storage_versioning_counter{0}; + std::atomic m_instance_versioning_counter{0}; inline uint_fast64_t get_storage_version(uint64_t instance_version) { @@ -547,14 +543,6 @@ inline bool Allocator::is_read_only(ref_type ref) const noexcept return ref < m_baseline.load(std::memory_order_relaxed); } -inline Allocator::Allocator() noexcept -{ - m_content_versioning_counter = 0; - m_storage_versioning_counter = 0; - m_instance_versioning_counter = 0; - m_ref_translation_ptr = nullptr; -} - // performance critical part of the translation process. Less critical code is in translate_less_critical. inline char* Allocator::translate_critical(RefTranslation* ref_translation_ptr, ref_type ref) const noexcept { @@ -566,30 +554,23 @@ inline char* Allocator::translate_critical(RefTranslation* ref_translation_ptr, if (REALM_LIKELY(offset < lowest_possible_xover_offset)) { // the lowest possible xover offset may grow concurrently, but that will not affect this code path char* addr = txl.mapping_addr + offset; -#if REALM_ENABLE_ENCRYPTION - realm::util::encryption_read_barrier(addr, NodeHeader::header_size, txl.encrypted_mapping, - NodeHeader::get_byte_size_from_header); -#endif + util::encryption_read_barrier(addr, NodeHeader::header_size, txl.encrypted_mapping); + size_t size = NodeHeader::get_byte_size_from_header(addr); + util::encryption_read_barrier(addr, size, txl.encrypted_mapping); return addr; } - else { - // the lowest possible xover offset may grow concurrently, but that will be handled inside the call - return translate_less_critical(ref_translation_ptr, ref); - } + // the lowest possible xover offset may grow concurrently, but that will be handled inside the call + return translate_less_critical(ref_translation_ptr, ref); } realm::util::terminate("Invalid ref translation entry", __FILE__, __LINE__, txl.cookie, 0x1234567890, ref, idx); - return nullptr; } inline char* Allocator::translate(ref_type ref) const noexcept { - auto ref_translation_ptr = m_ref_translation_ptr.load(std::memory_order_acquire); - if (REALM_LIKELY(ref_translation_ptr)) { - return translate_critical(ref_translation_ptr, ref); - } - else { - return do_translate(ref); + if (auto ptr = m_ref_translation_ptr.load(std::memory_order_acquire); REALM_LIKELY(ptr)) { + return translate_critical(ptr, ref); } + return do_translate(ref); } diff --git a/src/realm/alloc_slab.cpp b/src/realm/alloc_slab.cpp index af175d1965b..c421e1ce3c8 100644 --- a/src/realm/alloc_slab.cpp +++ b/src/realm/alloc_slab.cpp @@ -16,15 +16,13 @@ * **************************************************************************/ -#include -#include -#include #include -#include -#include -#include #include +#include #include +#include +#include +#include #if REALM_DEBUG #include @@ -35,13 +33,13 @@ #include #endif -#include #include -#include -#include +#include #include +#include #include #include +#include #include using namespace realm; @@ -85,7 +83,7 @@ util::File& SlabAlloc::get_file() } -const SlabAlloc::Header SlabAlloc::empty_file_header = { +inline constexpr SlabAlloc::Header SlabAlloc::empty_file_header = { {0, 0}, // top-refs {'T', '-', 'D', 'B'}, {0, 0}, // undecided file format @@ -131,8 +129,7 @@ SlabAlloc::Slab::~Slab() void SlabAlloc::detach(bool keep_file_open) noexcept { - delete[] m_ref_translation_ptr; - m_ref_translation_ptr.store(nullptr); + delete[] m_ref_translation_ptr.exchange(nullptr); m_translation_table_size = 0; set_read_only(true); purge_old_mappings(static_cast(-1), 0); @@ -164,9 +161,6 @@ void SlabAlloc::detach(bool keep_file_open) noexcept // placed correctly (logically) after the end of the file. m_slabs.clear(); clear_freelists(); -#if REALM_ENABLE_ENCRYPTION - m_realm_file_info = nullptr; -#endif m_attach_mode = attach_None; } @@ -661,7 +655,7 @@ int SlabAlloc::get_committed_file_format_version() noexcept // if we have mapped a file, m_mappings will have at least one mapping and // the first will be to the start of the file. Don't come here, if we're // just attaching a buffer. They don't have mappings. - realm::util::encryption_read_barrier(m_mappings[0].primary_mapping, 0, sizeof(Header)); + util::encryption_read_barrier(m_mappings[0].primary_mapping, 0, sizeof(Header)); } } const Header& header = *reinterpret_cast(m_data); @@ -805,10 +799,6 @@ ref_type SlabAlloc::attach_file(const std::string& path, Config& cfg, util::Writ // the call below to set_encryption_key. m_file.set_encryption_key(cfg.encryption_key); - note_reader_start(this); - util::ScopeExit reader_end_guard([this]() noexcept { - note_reader_end(this); - }); size_t size = 0; // The size of a database file must not exceed what can be encoded in // size_t. @@ -840,26 +830,17 @@ ref_type SlabAlloc::attach_file(const std::string& path, Config& cfg, util::Writ if (size == 0) { if (REALM_UNLIKELY(cfg.read_only)) throw InvalidDatabase("Read-only access to empty Realm file", path); - - size_t initial_size = page_size(); - // exFAT does not allocate a unique id for the file until it is non-empty. It must be - // valid at this point because File::get_unique_id() is used to distinguish - // mappings_for_file in the encryption layer. So the prealloc() is required before - // interacting with the encryption layer in File::write(). - // Pre-alloc initial space - m_file.prealloc(initial_size); // Throws - // seek() back to the start of the file in preparation for writing the header - // This sequence of File operations is protected from races by - // DB::m_controlmutex, so we know we are the only ones operating on the file - m_file.seek(0); + // We want all non-streaming files to be a multiple of the page size + // to simplify memory mapping, so just pre-reserve the required space now + m_file.prealloc(page_size()); // Throws const char* data = reinterpret_cast(&empty_file_header); - m_file.write(data, sizeof empty_file_header); // Throws + m_file.write(0, data, sizeof empty_file_header); // Throws bool disable_sync = get_disable_sync_to_disk() || cfg.disable_sync; if (!disable_sync) m_file.sync(); // Throws - size = initial_size; + size = size_t(m_file.get_size()); } ref_type top_ref = read_and_validate_header(m_file, path, size, cfg.session_initiator, m_write_observer); @@ -883,12 +864,9 @@ ref_type SlabAlloc::attach_file(const std::string& path, Config& cfg, util::Writ update_reader_view(size); REALM_ASSERT(m_mappings.size()); m_data = m_mappings[0].primary_mapping.get_addr(); - realm::util::encryption_read_barrier(m_mappings[0].primary_mapping, 0, sizeof(Header)); + util::encryption_read_barrier(m_mappings[0].primary_mapping, 0, sizeof(Header)); dg.release(); // Do not detach fcg.release(); // Do not close -#if REALM_ENABLE_ENCRYPTION - m_realm_file_info = util::get_file_info_for_file(m_file); -#endif return top_ref; } @@ -905,40 +883,20 @@ void SlabAlloc::convert_from_streaming_form(ref_type top_ref) { File::Map
writable_map(m_file, File::access_ReadWrite, sizeof(Header)); // Throws Header& writable_header = *writable_map.get_addr(); - realm::util::encryption_read_barrier_for_write(writable_map, 0); + util::encryption_read_barrier(writable_map, 0); writable_header.m_top_ref[1] = top_ref; writable_header.m_file_format[1] = writable_header.m_file_format[0]; realm::util::encryption_write_barrier(writable_map, 0); writable_map.sync(); - realm::util::encryption_read_barrier_for_write(writable_map, 0); + util::encryption_read_barrier(writable_map, 0); writable_header.m_flags |= flags_SelectBit; realm::util::encryption_write_barrier(writable_map, 0); writable_map.sync(); - realm::util::encryption_read_barrier(m_mappings[0].primary_mapping, 0, sizeof(Header)); + util::encryption_read_barrier(m_mappings[0].primary_mapping, 0, sizeof(Header)); } } -void SlabAlloc::note_reader_start(const void* reader_id) -{ -#if REALM_ENABLE_ENCRYPTION - if (m_realm_file_info) - util::encryption_note_reader_start(*m_realm_file_info, reader_id); -#else - static_cast(reader_id); -#endif -} - -void SlabAlloc::note_reader_end(const void* reader_id) noexcept -{ -#if REALM_ENABLE_ENCRYPTION - if (m_realm_file_info) - util::encryption_note_reader_end(*m_realm_file_info, reader_id); -#else - static_cast(reader_id); -#endif -} - ref_type SlabAlloc::attach_buffer(const char* data, size_t size) { // ExceptionSafety: If this function throws, it must leave the allocator in @@ -1009,8 +967,8 @@ ref_type SlabAlloc::read_and_validate_header(util::File& file, const std::string { try { // we'll read header and (potentially) footer - File::Map map_header(file, File::access_ReadOnly, sizeof(Header), 0, write_observer); - realm::util::encryption_read_barrier(map_header, 0, sizeof(Header)); + File::Map map_header(file, File::access_ReadOnly, sizeof(Header), write_observer); + util::encryption_read_barrier(map_header, 0, sizeof(Header)); auto header = reinterpret_cast(map_header.get_addr()); File::Map map_footer; @@ -1020,12 +978,12 @@ ref_type SlabAlloc::read_and_validate_header(util::File& file, const std::string size_t footer_page_base = footer_ref & ~(page_size() - 1); size_t footer_offset = footer_ref - footer_page_base; map_footer = File::Map(file, footer_page_base, File::access_ReadOnly, - sizeof(StreamingFooter) + footer_offset, 0, write_observer); - realm::util::encryption_read_barrier(map_footer, footer_offset, sizeof(StreamingFooter)); + sizeof(StreamingFooter) + footer_offset, write_observer); + util::encryption_read_barrier(map_footer, footer_offset, sizeof(StreamingFooter)); footer = reinterpret_cast(map_footer.get_addr() + footer_offset); } - auto top_ref = validate_header(header, footer, size, path, file.get_encryption_key() != nullptr); // Throws + auto top_ref = validate_header(header, footer, size, path, file.get_encryption() != nullptr); // Throws if (session_initiator && is_file_on_streaming_form(*header)) { // Don't compare file format version fields as they are allowed to differ. @@ -1278,10 +1236,10 @@ void SlabAlloc::update_reader_view(size_t file_size) const size_t section_size = std::min(1 << section_shift, file_size - section_start_offset); if (section_size == (1 << section_shift)) { new_mappings.push_back({util::File::Map(m_file, section_start_offset, File::access_ReadOnly, - section_size, 0, m_write_observer)}); + section_size, m_write_observer)}); } else { - new_mappings.push_back({util::File::Map()}); + new_mappings.emplace_back(); auto& mapping = new_mappings.back().primary_mapping; bool reserved = mapping.try_reserve(m_file, File::access_ReadOnly, 1 << section_shift, section_start_offset, m_write_observer); @@ -1291,7 +1249,7 @@ void SlabAlloc::update_reader_view(size_t file_size) throw std::bad_alloc(); } else { - new_mappings.back().primary_mapping.map(m_file, File::access_ReadOnly, section_size, 0, + new_mappings.back().primary_mapping.map(m_file, File::access_ReadOnly, section_size, section_start_offset, m_write_observer); } } @@ -1352,16 +1310,9 @@ void SlabAlloc::update_reader_view(size_t file_size) void SlabAlloc::schedule_refresh_of_outdated_encrypted_pages() { #if REALM_ENABLE_ENCRYPTION - // callers must already hold m_mapping_mutex - for (auto& e : m_mappings) { - if (auto m = e.primary_mapping.get_encrypted_mapping()) { - encryption_mark_pages_for_IV_check(m); - } - if (auto m = e.xover_mapping.get_encrypted_mapping()) { - encryption_mark_pages_for_IV_check(m); - } + if (auto encryption = m_file.get_encryption()) { + encryption->mark_data_as_possibly_stale(); } - // unsafe to do outside writing thread: verify(); #endif // REALM_ENABLE_ENCRYPTION } @@ -1457,7 +1408,7 @@ void SlabAlloc::get_or_add_xover_mapping(RefTranslation& txl, size_t index, size auto end_offset = file_offset + size; auto mapping_file_offset = file_offset & ~(_page_size - 1); auto minimal_mapping_size = end_offset - mapping_file_offset; - util::File::Map mapping(m_file, mapping_file_offset, File::access_ReadOnly, minimal_mapping_size, 0, + util::File::Map mapping(m_file, mapping_file_offset, File::access_ReadOnly, minimal_mapping_size, m_write_observer); map_entry->xover_mapping = std::move(mapping); } @@ -1553,7 +1504,7 @@ void SlabAlloc::resize_file(size_t new_file_size) m_file.prealloc(new_file_size); // Throws // resizing is done based on the logical file size. It is ok for the file // to actually be bigger, but never smaller. - REALM_ASSERT(new_file_size <= static_cast(m_file.get_size())); + REALM_ASSERT_EX(new_file_size <= static_cast(m_file.get_size()), new_file_size, m_file.get_size()); bool disable_sync = get_disable_sync_to_disk() || m_cfg.disable_sync; if (!disable_sync) diff --git a/src/realm/alloc_slab.hpp b/src/realm/alloc_slab.hpp index e1ad8a0ca9f..df7cf413b9e 100644 --- a/src/realm/alloc_slab.hpp +++ b/src/realm/alloc_slab.hpp @@ -19,20 +19,18 @@ #ifndef REALM_ALLOC_SLAB_HPP #define REALM_ALLOC_SLAB_HPP +#include #include // unint8_t etc -#include #include -#include -#include #include +#include +#include #include #include #include -#include #include #include -#include #include namespace realm { @@ -41,10 +39,6 @@ namespace realm { class Group; class GroupWriter; -namespace util { -struct SharedFileInfo; -} // namespace util - /// Thrown by Group and DB constructors if the specified file /// (or memory buffer) does not appear to contain a valid Realm /// database. @@ -363,11 +357,6 @@ class SlabAlloc : public Allocator { /// Returns total amount of slab for all slab allocators static size_t get_total_slab_size() noexcept; - /// Hooks used to keep the encryption layer informed of the start and stop - /// of transactions. - void note_reader_start(const void* reader_id); - void note_reader_end(const void* reader_id) noexcept; - /// Read the header (and possibly footer) from the file, returning the top ref if it's valid and throwing /// InvalidDatabase otherwise. static ref_type read_and_validate_header(util::File& file, const std::string& path, size_t size, @@ -656,7 +645,6 @@ class SlabAlloc : public Allocator { uint64_t m_youngest_live_version = 1; std::mutex m_mapping_mutex; util::File m_file; - util::SharedFileInfo* m_realm_file_info = nullptr; // vectors where old mappings, are held from deletion to ensure translations are // kept open and ref->ptr translations work for other threads.. std::vector m_old_mappings; diff --git a/src/realm/db.cpp b/src/realm/db.cpp index e85824b8458..b27cac7134a 100644 --- a/src/realm/db.cpp +++ b/src/realm/db.cpp @@ -776,7 +776,7 @@ class DB::FileVersionManager final : public DB::VersionManager { auto new_size = static_cast(m_file.get_size()); REALM_ASSERT(new_size > size); size = new_size; - m_reader_map.remap(m_file, File::access_ReadWrite, size, File::map_NoSync); + m_reader_map.remap(m_file, File::access_ReadWrite, size); m_info = m_reader_map.get_addr(); std::lock_guard lock(m_mutex); @@ -820,7 +820,7 @@ class DB::FileVersionManager final : public DB::VersionManager { }; // adapter class for marking/observing encrypted writes -class DB::EncryptionMarkerObserver : public util::WriteMarker, public util::WriteObserver { +class DB::EncryptionMarkerObserver final : public util::WriteMarker, public util::WriteObserver { public: EncryptionMarkerObserver(DB::VersionManager& vm) : vm(vm) @@ -840,7 +840,7 @@ class DB::EncryptionMarkerObserver : public util::WriteMarker, public util::Writ } ++calls_since_last_writer_observed; constexpr size_t max_calls = 5; // an arbitrary handful, > 1 - return (calls_since_last_writer_observed >= max_calls); + return calls_since_last_writer_observed >= max_calls; } void mark(uint64_t pos) override { @@ -985,7 +985,7 @@ void DB::open(const std::string& path, const DBOptions& options) // get the exclusive lock because we hold it, and hence were // waiting for the shared lock instead, to observe and use an // old lock file. - m_file_map.map(m_file, File::access_ReadWrite, sizeof(SharedInfo), File::map_NoSync); // Throws + m_file_map.map(m_file, File::access_ReadWrite, sizeof(SharedInfo)); // Throws File::UnmapGuard fug(m_file_map); SharedInfo* info = m_file_map.get_addr(); @@ -1050,7 +1050,7 @@ void DB::open(const std::string& path, const DBOptions& options) // the SharedInfo struct, or less if the file is smaller. We know that // we have at least one byte, and that is enough to read the // `init_complete` flag. - m_file_map.map(m_file, File::access_ReadWrite, info_size, File::map_NoSync); + m_file_map.map(m_file, File::access_ReadWrite, info_size); File::UnmapGuard fug_1(m_file_map); SharedInfo* info = m_file_map.get_addr(); @@ -1187,20 +1187,11 @@ void DB::open(const std::string& path, const DBOptions& options) // From here on, if we fail in any way, we must detach the // allocator. SlabAlloc::DetachGuard alloc_detach_guard(alloc); - alloc.note_reader_start(this); - // must come after the alloc detach guard - auto reader_end_guard = make_scope_exit([this, &alloc]() noexcept { - alloc.note_reader_end(this); - }); // Check validity of top array (to give more meaningful errors // early) if (top_ref) { try { - alloc.note_reader_start(this); - auto reader_end_guard = make_scope_exit([&]() noexcept { - alloc.note_reader_end(this); - }); Array top{alloc}; top.init_from_ref(top_ref); Group::validate_top_array(top, alloc); @@ -1618,7 +1609,7 @@ void DB::create_new_history(std::unique_ptr repl) // Unmapping (during close()) while transactions are live, is not considered an error. There // is a potential race between unmapping during close() and any operation carried out by a live // transaction. The user must ensure that this race never happens if she uses DB::close(). -bool DB::compact(bool bump_version_number, util::Optional output_encryption_key) +bool DB::compact(bool bump_version_number, std::optional output_encryption_key) NO_THREAD_SAFETY_ANALYSIS // this would work except for a known limitation: "No alias analysis" where clang cannot // tell that tr->db->m_mutex is the same thing as m_mutex { @@ -1635,7 +1626,19 @@ bool DB::compact(bool bump_version_number, util::Optional output_en } auto info = m_info; Durability dura = Durability(info->durability); - const char* write_key = bool(output_encryption_key) ? *output_encryption_key : get_encryption_key(); + std::string key_buffer; + const char* write_key = nullptr; + if (output_encryption_key) { + if (*output_encryption_key) { + write_key = *output_encryption_key; + } + } +#if REALM_ENABLE_ENCRYPTION + else if (auto encryption = m_alloc.get_file().get_encryption()) { + key_buffer = encryption->get_key(); + write_key = key_buffer.data(); + } +#endif { std::unique_lock lock(m_controlmutex); // Throws auto t1 = std::chrono::steady_clock::now(); @@ -1680,9 +1683,7 @@ bool DB::compact(bool bump_version_number, util::Optional output_en catch (...) { // If writing the compact version failed in any way, delete the partially written file to clean up disk // space. This is so that we don't fail with 100% disk space used when compacting on a mostly full disk. - if (File::exists(tmp_path)) { - File::remove(tmp_path); - } + File::try_remove(tmp_path); throw; } // if we've written a file with a bumped version number, we need to update the lock file to match. @@ -1726,7 +1727,7 @@ bool DB::compact(bool bump_version_number, util::Optional output_en return true; } -void DB::write_copy(StringData path, const char* output_encryption_key) +void DB::write_copy(std::string_view path, const char* output_encryption_key) { auto tr = start_read(); if (auto hist = tr->get_history()) { diff --git a/src/realm/db.hpp b/src/realm/db.hpp index 1a8a1b67461..e46ba6742c3 100644 --- a/src/realm/db.hpp +++ b/src/realm/db.hpp @@ -198,11 +198,6 @@ class DB : public std::enable_shared_from_this { return m_db_path; } - const char* get_encryption_key() const noexcept - { - return m_alloc.m_file.get_encryption_key(); - } - #ifdef REALM_DEBUG /// Deprecated method, only called from a unit test /// @@ -332,10 +327,10 @@ class DB : public std::enable_shared_from_this { /// the file to the new 64 byte key. /// /// WARNING: Compact() is not thread-safe with respect to a concurrent close() - bool compact(bool bump_version_number = false, util::Optional output_encryption_key = util::none) + bool compact(bool bump_version_number = false, std::optional output_encryption_key = util::none) REQUIRES(!m_mutex); - void write_copy(StringData path, const char* output_encryption_key) REQUIRES(!m_mutex); + void write_copy(std::string_view path, const char* output_encryption_key) REQUIRES(!m_mutex); #ifdef REALM_DEBUG void test_ringbuf(); diff --git a/src/realm/exec/realm_decrypt.cpp b/src/realm/exec/realm_decrypt.cpp index 134bc0d1818..2c647fb9dac 100644 --- a/src/realm/exec/realm_decrypt.cpp +++ b/src/realm/exec/realm_decrypt.cpp @@ -13,13 +13,13 @@ constexpr size_t block_size = 4096; int main(int argc, const char* argv[]) { if (argc > 3) { - const uint8_t* key_ptr = nullptr; + const char* key_ptr = nullptr; char key[64]; std::string outfilename = "out.realm"; for (int curr_arg = 1; curr_arg < argc; curr_arg++) { if (strcmp(argv[curr_arg], "--key") == 0) { hex_to_bin(argv[curr_arg + 1], key); - key_ptr = reinterpret_cast(key); + key_ptr = key; curr_arg++; } else if (strcmp(argv[curr_arg], "--out") == 0) { @@ -36,7 +36,7 @@ int main(int argc, const char* argv[]) auto size = (off_t)file.get_size(); decltype(size) pos = 0; util::AESCryptor cryptor(key_ptr); - cryptor.set_file_size(size); + cryptor.set_data_size(size); while (pos < size) { char buf[block_size]; cryptor.try_read_block(file.get_descriptor(), pos, buf); diff --git a/src/realm/group.cpp b/src/realm/group.cpp index 40fab4497f8..1938b2e54fb 100644 --- a/src/realm/group.cpp +++ b/src/realm/group.cpp @@ -984,10 +984,6 @@ void Group::write(File& file, const char* encryption_key, uint_fast64_t version_ file.set_encryption_key(encryption_key); - // Force the file system to allocate a node so we get a stable unique id. - // See File::get_unique_id(). This is used to distinguish encrypted mappings. - file.resize(1); - // The aim is that the buffer size should be at least 1/256 of needed size but less than 64 Mb constexpr size_t upper_bound = 64 * 1024 * 1024; size_t min_space = std::min(get_used_space() >> 8, upper_bound); diff --git a/src/realm/group_writer.cpp b/src/realm/group_writer.cpp index 9243d23e0b6..2e6651101fa 100644 --- a/src/realm/group_writer.cpp +++ b/src/realm/group_writer.cpp @@ -151,7 +151,7 @@ bool WriteWindowMgr::MapWindow::extends_to_match(util::File& f, ref_type start_r size_t window_size = get_window_size(f, start_ref, size); m_map.sync(); m_map.unmap(); - m_map.map(f, File::access_ReadWrite, window_size, 0, m_base_ref); + m_map.map(f, File::access_ReadWrite, window_size, m_base_ref); return true; } @@ -161,7 +161,7 @@ WriteWindowMgr::MapWindow::MapWindow(size_t alignment, util::File& f, ref_type s { m_base_ref = aligned_to_mmap_block(start_ref); size_t window_size = get_window_size(f, start_ref, size); - m_map.map(f, File::access_ReadWrite, window_size, 0, m_base_ref); + m_map.map(f, File::access_ReadWrite, window_size, m_base_ref); #if REALM_ENABLE_ENCRYPTION if (auto p = m_map.get_encrypted_mapping()) p->set_marker(write_marker); @@ -194,7 +194,7 @@ char* WriteWindowMgr::MapWindow::translate(ref_type ref) void WriteWindowMgr::MapWindow::encryption_read_barrier(void* start_addr, size_t size) { - realm::util::encryption_read_barrier_for_write(start_addr, size, m_map.get_encrypted_mapping()); + util::encryption_read_barrier_for_write(start_addr, size, m_map.get_encrypted_mapping()); } void WriteWindowMgr::MapWindow::encryption_write_barrier(void* start_addr, size_t size) diff --git a/src/realm/group_writer.hpp b/src/realm/group_writer.hpp index 438879114c6..c0c59145ffa 100644 --- a/src/realm/group_writer.hpp +++ b/src/realm/group_writer.hpp @@ -35,6 +35,9 @@ namespace realm { // Pre-declarations class Transaction; class SlabAlloc; +namespace util { +class WriteMarker; +} class Reachable { public: diff --git a/src/realm/node_header.hpp b/src/realm/node_header.hpp index 0251c796478..453f6100eaf 100644 --- a/src/realm/node_header.hpp +++ b/src/realm/node_header.hpp @@ -206,7 +206,6 @@ class NodeHeader { uint_least8_t width = get_width_from_header(header); WidthType wtype = get_wtype_from_header(header); size_t num_bytes = calc_byte_size(wtype, size, width); - return num_bytes; } diff --git a/src/realm/object-store/impl/realm_coordinator.cpp b/src/realm/object-store/impl/realm_coordinator.cpp index 2131a469ee7..eadd7b1638d 100644 --- a/src/realm/object-store/impl/realm_coordinator.cpp +++ b/src/realm/object-store/impl/realm_coordinator.cpp @@ -1258,7 +1258,7 @@ bool RealmCoordinator::compact() return m_db->compact(); } -void RealmCoordinator::write_copy(StringData path, const char* key) +void RealmCoordinator::write_copy(std::string_view path, const char* key) { m_db->write_copy(path, key); } diff --git a/src/realm/object-store/impl/realm_coordinator.hpp b/src/realm/object-store/impl/realm_coordinator.hpp index 3608e9c8596..3485cbfff7c 100644 --- a/src/realm/object-store/impl/realm_coordinator.hpp +++ b/src/realm/object-store/impl/realm_coordinator.hpp @@ -200,7 +200,7 @@ class RealmCoordinator : public std::enable_shared_from_this, void close(); bool compact(); - void write_copy(StringData path, const char* key); + void write_copy(std::string_view path, const char* key); // Close the DB, delete the file, and then reopen it. This operation is *not* // implemented in a safe manner and will only work in fairly specific circumstances diff --git a/src/realm/query_engine.hpp b/src/realm/query_engine.hpp index 8b7ecf2d1e8..6a91c717cf5 100644 --- a/src/realm/query_engine.hpp +++ b/src/realm/query_engine.hpp @@ -91,6 +91,7 @@ TConditionValue: Type of values in condition column. That is, int64_t, float, #include #include #include +#include #include #include diff --git a/src/realm/sync/noinst/client_impl_base.hpp b/src/realm/sync/noinst/client_impl_base.hpp index d094ab53ced..8efd17c4084 100644 --- a/src/realm/sync/noinst/client_impl_base.hpp +++ b/src/realm/sync/noinst/client_impl_base.hpp @@ -26,6 +26,7 @@ #include #include #include +#include #include namespace realm::sync { diff --git a/src/realm/sync/noinst/server/server.cpp b/src/realm/sync/noinst/server/server.cpp index aeac91412f5..e317076d0d8 100644 --- a/src/realm/sync/noinst/server/server.cpp +++ b/src/realm/sync/noinst/server/server.cpp @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/src/realm/sync/tools/print_changeset.cpp b/src/realm/sync/tools/print_changeset.cpp index 3412bc0ec77..d69839ce57c 100644 --- a/src/realm/sync/tools/print_changeset.cpp +++ b/src/realm/sync/tools/print_changeset.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include diff --git a/src/realm/tokenizer.cpp b/src/realm/tokenizer.cpp index f6bc42604cc..401be2fc4c6 100644 --- a/src/realm/tokenizer.cpp +++ b/src/realm/tokenizer.cpp @@ -61,7 +61,7 @@ std::pair, std::set> Tokenizer::get_search_to } }; for (; m_cur_pos != m_end_pos; m_cur_pos++) { - if (isspace(*m_cur_pos)) { + if (isspace(static_cast(*m_cur_pos))) { add_token(); } else { diff --git a/src/realm/transaction.cpp b/src/realm/transaction.cpp index 6714a6c1b60..9e93875923f 100644 --- a/src/realm/transaction.cpp +++ b/src/realm/transaction.cpp @@ -171,7 +171,6 @@ Transaction::Transaction(DBRef _db, SlabAlloc* alloc, DB::ReadLockInfo& rli, DB: bool writable = stage == DB::transact_Writing; m_transact_stage = DB::transact_Ready; set_transact_stage(stage); - m_alloc.note_reader_start(this); attach_shared(m_read_lock.m_top_ref, m_read_lock.m_file_size, writable, VersionID{rli.m_version, rli.m_reader_idx}); if (db->m_logger) { @@ -827,7 +826,6 @@ void Transaction::do_end_read() noexcept } db->release_read_lock(m_read_lock); - m_alloc.note_reader_end(this); set_transact_stage(DB::transact_Ready); // reset the std::shared_ptr to allow the DB object to release resources // as early as possible. @@ -851,7 +849,6 @@ void Transaction::close_read_with_lock() m_oldest_version_not_persisted->m_file_size); db->do_release_read_lock(m_read_lock); - m_alloc.note_reader_end(this); set_transact_stage(DB::transact_Ready); // reset the std::shared_ptr to allow the DB object to release resources // as early as possible. diff --git a/src/realm/util/aes_cryptor.hpp b/src/realm/util/aes_cryptor.hpp index d9f8da87fc2..b4e4d852abf 100644 --- a/src/realm/util/aes_cryptor.hpp +++ b/src/realm/util/aes_cryptor.hpp @@ -19,28 +19,31 @@ #ifndef REALM_AES_CRYPTOR_HPP #define REALM_AES_CRYPTOR_HPP +#include +#include + #include #include -#include -#include #include +#include #include -#include -#include - namespace realm::util { class WriteObserver { public: virtual bool no_concurrent_writer_seen() = 0; - virtual ~WriteObserver() {} + +protected: + ~WriteObserver() = default; }; class WriteMarker { public: virtual void mark(uint64_t page_offset) = 0; virtual void unmark() = 0; - virtual ~WriteMarker() {} + +protected: + ~WriteMarker() = default; }; } // namespace realm::util @@ -60,25 +63,27 @@ class WriteMarker { namespace realm::util { -struct iv_table; +struct IVTable; class EncryptedFileMapping; -enum class IVRefreshState { UpToDate, RequiresRefresh }; - class AESCryptor { public: - AESCryptor(const uint8_t* key); + AESCryptor(const char* key); ~AESCryptor() noexcept; - void set_file_size(off_t new_size); + void set_data_size(File::SizeType new_size); - size_t read(FileDesc fd, off_t pos, char* dst, size_t size, WriteObserver* observer = nullptr); - void try_read_block(FileDesc fd, off_t pos, char* dst) noexcept; - void write(FileDesc fd, off_t pos, const char* src, size_t size, WriteMarker* marker = nullptr) noexcept; - util::FlatMap refresh_ivs(FileDesc fd, off_t data_pos, size_t page_ndx_in_file_expected, - size_t end_page_ndx_in_file); + enum class ReadResult { Eof, Uninitialized, InterruptedFirstWrite, StaleHmac, Failed, Success }; + ReadResult read(FileDesc fd, File::SizeType pos, char* dst, WriteObserver* observer = nullptr); + void try_read_block(FileDesc fd, File::SizeType pos, char* dst) noexcept; + void write(FileDesc fd, File::SizeType pos, const char* src, WriteMarker* marker = nullptr) noexcept; + bool refresh_iv(FileDesc fd, size_t page_ndx); + void invalidate_ivs() noexcept; - void check_key(const uint8_t* key); + const char* get_key() const noexcept + { + return reinterpret_cast(m_key.data()); + } private: enum EncryptionMode { @@ -95,6 +100,7 @@ class AESCryptor { }; enum class IVLookupMode { UseCache, Refetch }; + using Hmac = std::array; #if REALM_PLATFORM_APPLE CCCryptorRef m_encr; @@ -105,36 +111,21 @@ class AESCryptor { EVP_CIPHER_CTX* m_ctx; #endif - std::array m_aesKey; - std::array m_hmacKey; - std::vector m_iv_buffer; + const std::array m_key; + std::vector m_iv_buffer; + std::vector m_iv_buffer_cache; + std::vector m_iv_blocks_read; std::unique_ptr m_rw_buffer; std::unique_ptr m_dst_buffer; - std::vector m_iv_buffer_cache; - bool check_hmac(const void* data, size_t len, const std::array& hmac) const; - void crypt(EncryptionMode mode, off_t pos, char* dst, const char* src, const char* stored_iv) noexcept; - iv_table& get_iv_table(FileDesc fd, off_t data_pos, IVLookupMode mode = IVLookupMode::UseCache) noexcept; + bool constant_time_equals(const Hmac&, const Hmac&) const; + void calculate_hmac(Hmac&) const; + void crypt(EncryptionMode mode, File::SizeType pos, char* dst, const char* src, const char* stored_iv) noexcept; + IVTable& get_iv_table(FileDesc fd, File::SizeType data_pos, IVLookupMode mode = IVLookupMode::UseCache) noexcept; void handle_error(); -}; - -struct ReaderInfo { - const void* reader_ID; - uint64_t version; -}; - -struct SharedFileInfo { - FileDesc fd; - AESCryptor cryptor; - std::vector mappings; - uint64_t last_scanned_version = 0; - uint64_t current_version = 0; - size_t num_decrypted_pages = 0; - size_t num_reclaimed_pages = 0; - size_t progress_index = 0; - std::vector readers; - - SharedFileInfo(const uint8_t* key); + void read_iv_block(FileDesc fd, File::SizeType data_pos); + ReadResult attempt_read(FileDesc fd, File::SizeType pos, char* dst, IVLookupMode iv_mode, uint32_t& iv, + Hmac& hmac); }; } // namespace realm::util diff --git a/src/realm/util/encrypted_file_mapping.cpp b/src/realm/util/encrypted_file_mapping.cpp index 8106b96336e..5e5ef6b00a0 100644 --- a/src/realm/util/encrypted_file_mapping.cpp +++ b/src/realm/util/encrypted_file_mapping.cpp @@ -18,6 +18,7 @@ #include +#include #include #include @@ -25,26 +26,23 @@ #if REALM_ENABLE_ENCRYPTION #include #include -#include #include #include +#include -#include #include +#include #include -#include +#include +#include +#include #include -#include #include #ifdef REALM_DEBUG #include #endif -#include -#include -#include - #if defined(_WIN32) #include #include @@ -52,128 +50,244 @@ #else #include #include -#include #endif namespace realm::util { -SharedFileInfo::SharedFileInfo(const uint8_t* key) - : cryptor(key) -{ -} - -// We have the following constraints here: +// When Realm's file encryption was originally designed, we had the constraint +// that all encryption and decryption had to happen in aligned system page size +// sized blocks due to the use of signal handlers to lazily decrypt data and +// track where writes occurrs. This is no longer the case, but may still help +// explain why the file layout looks the way it does. // -// 1. When writing, we only know which 4k page is dirty, and not what bytes -// within the page are dirty, so we always have to write in 4k blocks. -// 2. Pages being written need to be entirely within an 8k-aligned block to -// ensure that they're written to the hardware in atomic blocks. -// 3. We need to store the IV used for each 4k page somewhere, so that we can -// ensure that we never reuse an IV (and still be decryptable). +// Encryption is performed on 4096 byte data pages. Each group of 64 data pages +// is arranged into a "block", which has a 4096 byte header containing the IVs +// and HMACs for the following pages. Each page has *two* IVs and HMACs stored. +// iv2/hmac2 contain the values which were last used to successfully decrypt +// the page, while iv1/hmac1 is the values which were used to last encrypt the +// page. // -// Because pages need to be aligned, we can't just prepend the IV to each page, -// or we'd have to double the size of the file (as the rest of the 4k block -// containing the IV would not be usable). Writing the IVs to a different part -// of the file from the data results in them not being in the same 8k block, and -// so it is possible that only the IV or only the data actually gets updated on -// disk. We deal with this by storing four pieces of data about each page: the -// hash of the encrypted data, the current IV, the hash of the previous encrypted -// data, and the previous IV. To write, we encrypt the data, hash the ciphertext, -// then write the new IV/ciphertext hash, fsync(), and then write the new -// ciphertext. This ensures that if an error occurs between writing the IV and -// the ciphertext, we can still determine that we should use the old IV, since -// the ciphertext's hash will match the old ciphertext. - -// This produces a file on disk with the following layout: -// 4k block of metadata (up to 64 iv_table instances stored here) -// 64 * 4k blocks of data (up to 262144 bytes of data are stored here) -// 4k block of metadata -// 64 * 4k blocks of data -// ... - -struct iv_table { +// Writing new encrypted data has the following steps: +// +// 1. Copy iv1/hmac1 to iv2/hmac2 in the IVTable +// 2. Increment iv1 +// 3. Encrypt the page in memory +// 4. Compute the hmac for the new encrypted data. +// 5. If the hmac matches the previous hmac, goto 2 (this will not ever actually happen) +// 6. Write the new IVTable for the page. +// 7. fsync() (or F_BARRIERFSYNC on Apple) +// 8. Write the new encrypted data +// +// If we are interrupted before #6, no i/o has happened and the data on disk is +// fine. If we are interrupted between #6 and #8, then when we next try to read +// the page the hmac check using hmac1 will fail, but the check using hmac2 +// will succeed and we will be able to read the old data. We then copy +// iv2/hmac2 back to the active fields and continue as normal. +// +// This scheme breaks if we have a partial write of the 4k page. This is +// impossible with SSDs, which can only write in their atomic block size, and +// it would be extremely unusual for that to be smaller than 4k. It may be a +// problem when running on HDDs, though. +// +// Reading from an encrypted file is done by creating a mapping and then +// calling `read_barrier(addr, size)` to mark the section of the mapping which +// needs to be populated. This decrypts each of the pages which cover that +// range and places the plaintext into memory. If any of the pages were already +// decrypted, this is a no-op that skips reading anything and just assumes that +// the data was up-to-date. +// +// Writing is done with `read_barrier(addr, size, true)` before performing any +// writes to mark the range as writeable, and then `write_barrier(addr, size)` +// to mark bytes which were actually written to. `write_barrier()` eagerly +// copies all of the written bytes to any other active mappings on the same +// file which have those pages decrypted in memory. This is spooky +// threading-wise, and is only made safe by Realm's MVCC semantics - if we're +// writing to a section of the file we know that no one can be legally reading +// those exact bytes, and we must be writing to different bytes in the same +// page. This copying makes it so that we never have to recheck the disk; once +// we have read and decrypted a page for a mapping, that page is forevermore +// valid and up-to-date. +// +// All dirty data is kept buffered in memory until `flush()` is called. +// +// In multi-process scenarios (or just multiple File instances for a single +// file in a single process, which doesn't happen when using the public API +// normally), eagerly keeping decrypted pages up to date is impossible, and we +// sometimes need to recheck the disk. Here we once again take advantage of +// Realm being MVCC with discrete points where we may need to see newer +// versions of the data on disk. When the reader view is updated, if there have +// been any external writes to the file SlabAlloc calls +// `mark_pages_for_iv_check()`, which puts all up-to-date pages into a +// potentially-stale state. The next time each page is accessed, we reread the +// IVTable for that page. If it's the same as the IVTable for the plaintext we +// have in memory then the page is marked as being up-to-date, and if it's +// different we reread the page. +// +// Another source of complexity in multiprocess scenarios is that while we +// assume that the actual i/o is atomic in 4k chunks, writing to the in-memory +// buffers is distinctly not atomic. One process reading from a memory mapping +// while another process is writing to that position in the file can see +// incomplete writes. Rather than doing page-level locking, we assume that this +// will be very rare and perform optimistic unlocked reads. If decryption fails +// and we are in a potentially-multiprocess scenario we retry the read several +// times before reporting an error. + +struct IVTable { uint32_t iv1 = 0; std::array hmac1 = {}; uint32_t iv2 = 0; std::array hmac2 = {}; - bool operator==(const iv_table& other) const + bool operator==(const IVTable& other) const { return iv1 == other.iv1 && iv2 == other.iv2 && hmac1 == other.hmac1 && hmac2 == other.hmac2; } - bool operator!=(const iv_table& other) const + bool operator!=(const IVTable& other) const { return !(*this == other); } }; +// We read this via memcpy and need it to be packed +static_assert(sizeof(IVTable) == 64); namespace { -const int aes_block_size = 16; -const size_t block_size = 4096; - -const size_t metadata_size = sizeof(iv_table); -const size_t blocks_per_metadata_block = block_size / metadata_size; +constexpr uint8_t aes_block_size = 16; +constexpr uint16_t encryption_page_size = 4096; +constexpr uint8_t metadata_size = sizeof(IVTable); +constexpr uint8_t pages_per_block = encryption_page_size / metadata_size; static_assert(metadata_size == 64, "changing the size of the metadata breaks compatibility with existing Realm files"); +using SizeType = File::SizeType; + +template +To checked_cast(From from) +{ + To to; + if (REALM_UNLIKELY(int_cast_with_overflow_detect(from, to))) { + throw MaximumFileSizeExceeded(util::format("File size %1 is larger than can be represented", from)); + } + return to; +} + +// Overflows when converting from file positions (always 64-bits) to size_t +// (sometimes 32-bits) should all be caught by set_file_size() +template +constexpr To assert_cast(From from) +{ + REALM_ASSERT_DEBUG(!int_cast_has_overflow(from)); + return static_cast(from); +} + +// Index of page which contains `data_pos` +constexpr size_t page_index(SizeType data_pos) noexcept +{ + SizeType index = data_pos / encryption_page_size; + return assert_cast(index); +} + +// Number of pages required to store `size` bytes +constexpr size_t page_count(SizeType size) noexcept +{ + return assert_cast((size + encryption_page_size - 1) / encryption_page_size); +} + +// Index of the metadata block which contains `data_pos` +constexpr size_t block_index(SizeType data_pos) noexcept +{ + return page_index(data_pos) / pages_per_block; +} + +// Number of metadata blocks required to store `size` bytes +constexpr size_t block_count(SizeType data_size) noexcept +{ + return (page_count(data_size) + pages_per_block - 1) / pages_per_block; +} + // map an offset in the data to the actual location in the file -template -Int real_offset(Int pos) +SizeType data_pos_to_file_pos(SizeType data_pos) { - REALM_ASSERT(pos >= 0); - const size_t index = static_cast(pos) / block_size; - const size_t metadata_page_count = index / blocks_per_metadata_block + 1; - return Int(pos + metadata_page_count * block_size); + REALM_ASSERT(data_pos >= 0); + return data_pos + (block_index(data_pos) + 1) * encryption_page_size; } // map a location in the file to the offset in the data -template -Int fake_offset(Int pos) +SizeType file_pos_to_data_pos(SizeType file_pos) { - REALM_ASSERT(pos >= 0); - const size_t index = static_cast(pos) / block_size; - const size_t metadata_page_count = (index + blocks_per_metadata_block) / (blocks_per_metadata_block + 1); - return pos - metadata_page_count * block_size; + REALM_ASSERT(file_pos >= 0); + const size_t metadata_page_count = (page_index(file_pos) + pages_per_block) / (pages_per_block + 1); + return file_pos - metadata_page_count * encryption_page_size; } -// get the location of the iv_table for the given data (not file) position -off_t iv_table_pos(off_t pos) +// get the location of the IVTable for the given data (not file) position +SizeType iv_table_pos(SizeType data_pos) { - REALM_ASSERT(pos >= 0); - const size_t index = static_cast(pos) / block_size; - const size_t metadata_block = index / blocks_per_metadata_block; - const size_t metadata_index = index & (blocks_per_metadata_block - 1); - return off_t(metadata_block * (blocks_per_metadata_block + 1) * block_size + metadata_index * metadata_size); + REALM_ASSERT(data_pos >= 0); + const size_t index = page_index(data_pos); + const size_t metadata_block = block_index(data_pos); + const size_t metadata_index = index & (pages_per_block - 1); + return metadata_block * (pages_per_block + 1) * encryption_page_size + metadata_index * metadata_size; } -void check_write(FileDesc fd, off_t pos, const void* data, size_t len) +// get the file location of the IVTable block for the given data (not file) position +SizeType iv_table_block_pos(SizeType data_pos) { - uint64_t orig = File::get_file_pos(fd); - File::seek_static(fd, pos); - File::write_static(fd, static_cast(data), len); - File::seek_static(fd, orig); + REALM_ASSERT(data_pos >= 0); + return block_index(data_pos) * (pages_per_block + 1) * encryption_page_size; } -size_t check_read(FileDesc fd, off_t pos, void* dst, size_t len) +constexpr size_t iv_table_size(SizeType data_pos) { - uint64_t orig = File::get_file_pos(fd); - File::seek_static(fd, pos); - size_t ret = File::read_static(fd, static_cast(dst), len); - File::seek_static(fd, orig); - return ret; + return block_count(data_pos) * pages_per_block; } -} // anonymous namespace +// not actually checked any more +size_t check_read(FileDesc fd, SizeType pos, void* dst) +{ + return File::read_static(fd, pos, static_cast(dst), encryption_page_size); +} // first block is iv data, second page is data -static_assert(c_min_encrypted_file_size == 2 * block_size, +static_assert(c_min_encrypted_file_size == 2 * encryption_page_size, "chaging the block size breaks encrypted file portability"); -AESCryptor::AESCryptor(const uint8_t* key) - : m_rw_buffer(new char[block_size]) - , m_dst_buffer(new char[block_size]) +template +constexpr std::array to_array_impl(const T* ptr, std::index_sequence) +{ + return {{ptr[I]...}}; +} +template +constexpr auto to_array(const T* ptr) +{ + return to_array_impl(ptr, std::make_index_sequence{}); +} + +void memcpy_if_changed(void* dst, const void* src, size_t n) { - memcpy(m_aesKey.data(), key, 32); - memcpy(m_hmacKey.data(), key + 32, 32); +#if REALM_SANITIZE_THREAD + // Because our copying is page-level granularity, we have some benign races + // where the byte ranges in each page that weren't modified get overwritten + // with the same values as they already had. TSan correctly reports this as + // a data race, so when using TSan do (much slower) byte-level checking for + // modifications and only write the ones which changed. Unlike suppressing + // the warning entirely, this will still produce tsan errors if we actually + // change any bytes that another thread is reading. + auto dst_2 = static_cast(dst); + auto src_2 = static_cast(src); + for (size_t i = 0; i < n; ++i) { + if (dst_2[i] != src_2[i]) + dst_2[i] = src_2[i]; + } +#else + memcpy(dst, src, n); +#endif +} +} // anonymous namespace + +AESCryptor::AESCryptor(const char* key) + : m_key(to_array(reinterpret_cast(key))) + , m_rw_buffer(new char[encryption_page_size]) + , m_dst_buffer(new char[encryption_page_size]) +{ #if REALM_PLATFORM_APPLE // A random iv is passed to CCCryptorReset. This iv is *not used* by Realm; we set it manually prior to // each call to BCryptEncrypt() and BCryptDecrypt(). We pass this random iv as an attempt to @@ -214,278 +328,232 @@ AESCryptor::~AESCryptor() noexcept #endif } -void AESCryptor::check_key(const uint8_t* key) -{ - if (memcmp(m_aesKey.data(), key, 32) != 0 || memcmp(m_hmacKey.data(), key + 32, 32) != 0) - throw DecryptionFailed(); -} - void AESCryptor::handle_error() { throw std::runtime_error("Error occurred in encryption layer"); } -void AESCryptor::set_file_size(off_t new_size) +void AESCryptor::set_data_size(SizeType new_data_size) { - REALM_ASSERT(new_size >= 0 && !int_cast_has_overflow(new_size)); - size_t new_size_casted = size_t(new_size); - size_t block_count = (new_size_casted + block_size - 1) / block_size; - m_iv_buffer.reserve((block_count + blocks_per_metadata_block - 1) & ~(blocks_per_metadata_block - 1)); + REALM_ASSERT(new_data_size >= 0); + m_iv_buffer.reserve(iv_table_size(new_data_size)); m_iv_buffer_cache.reserve(m_iv_buffer.capacity()); + m_iv_blocks_read.resize(m_iv_buffer.capacity() / 64); } -iv_table& AESCryptor::get_iv_table(FileDesc fd, off_t data_pos, IVLookupMode mode) noexcept +IVTable& AESCryptor::get_iv_table(FileDesc fd, SizeType data_pos, IVLookupMode mode) noexcept { - REALM_ASSERT(!int_cast_has_overflow(data_pos)); - size_t data_pos_casted = size_t(data_pos); - size_t idx = data_pos_casted / block_size; - if (mode == IVLookupMode::UseCache && idx < m_iv_buffer.size()) - return m_iv_buffer[idx]; - - size_t block_start = std::min(m_iv_buffer.size(), (idx / blocks_per_metadata_block) * blocks_per_metadata_block); - size_t block_end = 1 + idx / blocks_per_metadata_block; - REALM_ASSERT(block_end * blocks_per_metadata_block <= m_iv_buffer.capacity()); // not safe to allocate here - if (block_end * blocks_per_metadata_block > m_iv_buffer.size()) { - m_iv_buffer.resize(block_end * blocks_per_metadata_block); - m_iv_buffer_cache.resize(m_iv_buffer.size()); + size_t idx = page_index(data_pos); + REALM_ASSERT(idx < m_iv_buffer.capacity()); // required space should have been preallocated + if (mode != IVLookupMode::UseCache || idx >= m_iv_buffer.size() || !m_iv_blocks_read[block_index(data_pos)]) { + read_iv_block(fd, data_pos); } + m_iv_buffer_cache[idx] = m_iv_buffer[idx]; + return m_iv_buffer[idx]; +} - for (size_t i = block_start; i < block_end * blocks_per_metadata_block; i += blocks_per_metadata_block) { - off_t iv_pos = iv_table_pos(off_t(i * block_size)); - size_t bytes = check_read(fd, iv_pos, &m_iv_buffer[i], block_size); - if (bytes < block_size) - break; // rest is zero-filled by resize() +// We always read an entire block of IVTables at a time rather than just the +// one we need as it's likely to take about the same amount of time up front +// and greatly reduce the total number of read calls we have to make +void AESCryptor::read_iv_block(FileDesc fd, SizeType data_pos) +{ + size_t idx = block_index(data_pos) * pages_per_block; + if (idx + pages_per_block > m_iv_buffer.size()) { + m_iv_buffer.resize(idx + pages_per_block); + m_iv_buffer_cache.resize(m_iv_buffer.size()); } - - return m_iv_buffer[idx]; + SizeType iv_pos = iv_table_block_pos(data_pos); + check_read(fd, iv_pos, &m_iv_buffer[idx]); + m_iv_blocks_read[block_index(data_pos)] = true; } -bool AESCryptor::check_hmac(const void* src, size_t len, const std::array& hmac) const +void AESCryptor::calculate_hmac(Hmac& hmac) const { - std::array buffer; - hmac_sha224(Span(reinterpret_cast(src), len), buffer, m_hmacKey); + hmac_sha224(Span(reinterpret_cast(m_rw_buffer.get()), encryption_page_size), hmac, + Span(m_key).sub_span<32>()); +} +bool AESCryptor::constant_time_equals(const Hmac& a, const Hmac& b) const +{ // Constant-time memcmp to avoid timing attacks uint8_t result = 0; for (size_t i = 0; i < 224 / 8; ++i) - result |= buffer[i] ^ hmac[i]; + result |= a[i] ^ b[i]; return result == 0; } -util::FlatMap -AESCryptor::refresh_ivs(FileDesc fd, off_t data_pos, size_t page_ndx_in_file_expected, size_t end_page_ndx_in_file) +bool AESCryptor::refresh_iv(FileDesc fd, size_t page_ndx) { - REALM_ASSERT_EX(page_ndx_in_file_expected < end_page_ndx_in_file, page_ndx_in_file_expected, - end_page_ndx_in_file); - // the indices returned are page indices, not block indices - util::FlatMap page_states; - - REALM_ASSERT(!int_cast_has_overflow(data_pos)); - size_t data_pos_casted = size_t(data_pos); - // the call to get_iv_table() below reads in all ivs in a chunk with size = blocks_per_metadata_block - // so we will know if any iv in this chunk has changed - const size_t block_ndx_refresh_start = - ((data_pos_casted / block_size) / blocks_per_metadata_block) * blocks_per_metadata_block; - const size_t block_ndx_refresh_end = block_ndx_refresh_start + blocks_per_metadata_block; - REALM_ASSERT_EX(block_ndx_refresh_end <= m_iv_buffer.size(), block_ndx_refresh_start, block_ndx_refresh_end, - m_iv_buffer.size()); - - get_iv_table(fd, data_pos, IVLookupMode::Refetch); - - size_t number_of_identical_blocks = 0; - size_t last_page_index = -1; - constexpr iv_table uninitialized_iv = {}; - // there may be multiple iv blocks per page so all must be unchanged for a page - // to be considered unchanged. If any one of the ivs has changed then the entire page - // must be refreshed. Eg. with a page_size() of 16k and block_size of 4k, if any of - // the 4 ivs in that page are different, the entire page must be refreshed. - const size_t num_required_identical_blocks_for_page_match = page_size() / block_size; - for (size_t block_ndx = block_ndx_refresh_start; block_ndx < block_ndx_refresh_end; ++block_ndx) { - size_t page_index = block_ndx * block_size / page_size(); - if (page_index >= end_page_ndx_in_file) { - break; - } - if (page_index != last_page_index) { - number_of_identical_blocks = 0; - } - if (m_iv_buffer_cache[block_ndx] != m_iv_buffer[block_ndx] || m_iv_buffer[block_ndx] == uninitialized_iv) { - page_states[page_index] = IVRefreshState::RequiresRefresh; - m_iv_buffer_cache[block_ndx] = m_iv_buffer[block_ndx]; - } - else { - ++number_of_identical_blocks; - } - if (number_of_identical_blocks >= num_required_identical_blocks_for_page_match) { - REALM_ASSERT_EX(page_states.count(page_index) == 0, page_index, page_ndx_in_file_expected); - page_states[page_index] = IVRefreshState::UpToDate; - } - last_page_index = page_index; + REALM_ASSERT(page_ndx < m_iv_buffer.capacity()); + if (page_ndx >= m_iv_buffer.size() || !m_iv_blocks_read[page_ndx / pages_per_block]) { + read_iv_block(fd, SizeType(page_ndx) * encryption_page_size); } - REALM_ASSERT_EX(page_states.count(page_ndx_in_file_expected) == 1, page_states.size(), page_ndx_in_file_expected, - block_ndx_refresh_start, blocks_per_metadata_block); - return page_states; + + if (m_iv_buffer[page_ndx] != m_iv_buffer_cache[page_ndx]) { + m_iv_buffer_cache[page_ndx] = m_iv_buffer[page_ndx]; + return true; + } + return false; } -size_t AESCryptor::read(FileDesc fd, off_t pos, char* dst, size_t size, WriteObserver* observer) +void AESCryptor::invalidate_ivs() noexcept { - REALM_ASSERT_EX(size % block_size == 0, size, block_size); - // We need to throw DecryptionFailed if the key is incorrect or there has been a corruption in the data but - // not in a reader starvation scenario where a different process is writing pages and ivs faster than we can read - // them. We also want to optimize for a single process writer since in that case all the cached ivs are correct. - // To do this, we first attempt to use the cached IV, and if it is invalid, read from disk again. During reader - // starvation, the just read IV could already be out of date with the data page, so continue trying to read until - // a match is found (for up to 5 seconds before giving up entirely). + m_iv_blocks_read.assign(m_iv_blocks_read.size(), false); +} + +AESCryptor::ReadResult AESCryptor::read(FileDesc fd, SizeType pos, char* dst, WriteObserver* observer) +{ + uint32_t iv = 0; + Hmac hmac{}; + // We're in a single-process scenario (or other processes are only reading), + // so we can trust our in-memory caches and never need to retry + if (!observer || observer->no_concurrent_writer_seen()) { + return attempt_read(fd, pos, dst, IVLookupMode::UseCache, iv, hmac); + } + + // There's another process which might be trying to write to the file while + // we're reading from it, which means that we might see invalid data due to + // data races. When this happens we need to retry the read, and only throw + // an error if the data either hasn't changed after the timeout has expired + // or if we're in a reader starvation scenario where the writer is producing + // new data faster than we can consume it. size_t retry_count = 0; - std::pair last_iv_and_data_hash; + std::pair last_iv_and_data_hash; auto retry_start_time = std::chrono::steady_clock::now(); size_t num_identical_reads = 1; - auto retry = [&](std::string_view page_data, const iv_table& iv, const char* debug_from) { + ReadResult result = ReadResult::Success; + while (retry_count <= 5 || (retry_count - num_identical_reads > 1 && retry_count < 20)) { + result = + attempt_read(fd, pos, dst, retry_count == 0 ? IVLookupMode::UseCache : IVLookupMode::Refetch, iv, hmac); + switch (result) { + case ReadResult::Success: + case ReadResult::Eof: + case ReadResult::Uninitialized: + // Consistent and valid states that may or may not actually have data + return result; + case ReadResult::InterruptedFirstWrite: + case ReadResult::StaleHmac: + case ReadResult::Failed: + // Inconsistent states which may change if we retry + break; + } + + // Check if we've timed out, but always retry at least once in case + // we got suspended while another process was writing or something constexpr auto max_retry_period = std::chrono::seconds(5); auto elapsed = std::chrono::steady_clock::now() - retry_start_time; - bool we_are_alone = true; - // not having an observer set means that we're alone. (or should mean it) - if (observer) { - we_are_alone = observer->no_concurrent_writer_seen(); - } - if (we_are_alone || (retry_count > 0 && elapsed > max_retry_period)) { - auto str = util::format("unable to decrypt after %1 seconds (retry_count=%2, from=%3, size=%4)", - std::chrono::duration_cast(elapsed).count(), retry_count, - debug_from, size); + if (retry_count > 0 && elapsed > max_retry_period) { + auto str = util::format("unable to decrypt after %1 seconds (retry_count=%2)", + std::chrono::duration_cast(elapsed).count(), retry_count); // std::cerr << std::endl << "*Timeout: " << str << std::endl; throw DecryptionFailed(str); } - else { - // don't wait on the first retry as we want to optimize the case where the first read - // from the iv table cache didn't validate and we are fetching the iv block from disk for the first time - auto cur_iv_and_data_hash = std::make_pair(iv, std::hash{}(page_data)); - if (retry_count != 0) { - if (last_iv_and_data_hash == cur_iv_and_data_hash) { - ++num_identical_reads; - } - // don't retry right away if there are potentially other external writers - std::this_thread::yield(); - } - last_iv_and_data_hash = cur_iv_and_data_hash; - ++retry_count; - } - }; - - auto should_retry = [&]() -> bool { - // if we don't have an observer object, we're guaranteed to be alone in the world, - // and retrying will not help us, since the file is not being changed. - if (!observer) - return false; - // if no-one is mutating the file, retrying will also not help: - if (observer && observer->no_concurrent_writer_seen()) - return false; - // if we do not observe identical data or iv within several sequential reads then - // this is a multiprocess reader starvation scenario so keep trying until we get a match - return retry_count <= 5 || (retry_count - num_identical_reads > 1 && retry_count < 20); - }; - - size_t bytes_read = 0; - while (bytes_read < size) { - ssize_t actual = check_read(fd, real_offset(pos), m_rw_buffer.get(), block_size); - - if (actual == 0) - return bytes_read; - - iv_table& iv = get_iv_table(fd, pos, retry_count == 0 ? IVLookupMode::UseCache : IVLookupMode::Refetch); - if (iv.iv1 == 0) { - if (should_retry()) { - retry(std::string_view{m_rw_buffer.get(), block_size}, iv, "iv1 == 0"); - continue; + + // don't wait on the first retry as we want to optimize the case where the first read + // from the iv table cache didn't validate and we are fetching the iv block from disk for the first time + std::pair cur_iv_and_data_hash(iv, hmac); + if (retry_count != 0) { + if (last_iv_and_data_hash == cur_iv_and_data_hash) { + ++num_identical_reads; } - // This block has never been written to, so we've just read pre-allocated - // space. No memset() since the code using this doesn't rely on - // pre-allocated space being zeroed. - return bytes_read; + // don't retry right away if there are potentially other external writers + std::this_thread::yield(); } + last_iv_and_data_hash = cur_iv_and_data_hash; + ++retry_count; - if (!check_hmac(m_rw_buffer.get(), actual, iv.hmac1)) { - // Either the DB is corrupted or we were interrupted between writing the - // new IV and writing the data - if (iv.iv2 == 0) { - if (should_retry()) { - retry(std::string_view{m_rw_buffer.get(), block_size}, iv, "iv2 == 0"); - continue; - } - // Very first write was interrupted - return bytes_read; - } + if (observer->no_concurrent_writer_seen()) + break; + } - if (check_hmac(m_rw_buffer.get(), actual, iv.hmac2)) { - // Un-bump the IV since the write with the bumped IV never actually - // happened - memcpy(&iv.iv1, &iv.iv2, 32); - } - else { - // If the file has been shrunk and then re-expanded, we may have - // old hmacs that don't go with this data. ftruncate() is - // required to fill any added space with zeroes, so assume that's - // what happened if the buffer is all zeroes - ssize_t i; - for (i = 0; i < actual; ++i) { - if (m_rw_buffer[i] != 0) { - break; - } - } - if (i != actual) { - // at least one byte wasn't zero - retry(std::string_view{m_rw_buffer.get(), block_size}, iv, "i != bytes_read"); - continue; - } - return bytes_read; - } + return result; +} + +AESCryptor::ReadResult AESCryptor::attempt_read(FileDesc fd, SizeType pos, char* dst, IVLookupMode iv_mode, + uint32_t& iv_out, Hmac& hmac) +{ + IVTable& iv = get_iv_table(fd, pos, iv_mode); + iv_out = iv.iv1; + if (iv.iv1 == 0) { + std::fill(hmac.begin(), hmac.end(), 0); + return ReadResult::Uninitialized; + } + + size_t actual = check_read(fd, data_pos_to_file_pos(pos), m_rw_buffer.get()); + if (actual == 0) { + return ReadResult::Eof; + } + + calculate_hmac(hmac); + if (!constant_time_equals(hmac, iv.hmac1)) { + // Either the DB is corrupted or we were interrupted between writing the + // new IV and writing the data + if (iv.iv2 == 0) { + return ReadResult::InterruptedFirstWrite; } - // We may expect some adress ranges of the destination buffer of - // AESCryptor::read() to stay unmodified, i.e. being overwritten with - // the same bytes as already present, and may have read-access to these - // from other threads while decryption is taking place. - // - // However, some implementations of AES_cbc_encrypt(), in particular - // OpenSSL, will put garbled bytes as an intermediate step during the - // operation which will lead to incorrect data being read by other - // readers concurrently accessing that page. Incorrect data leads to - // crashes. - // - // We therefore decrypt to a temporary buffer first and then copy the - // completely decrypted data after. - crypt(mode_Decrypt, pos, m_dst_buffer.get(), m_rw_buffer.get(), reinterpret_cast(&iv.iv1)); - memcpy(dst, m_dst_buffer.get(), block_size); - - pos += block_size; - dst += block_size; - bytes_read += block_size; - retry_count = 0; + if (constant_time_equals(hmac, iv.hmac2)) { + // Un-bump the IV since the write with the bumped IV never actually + // happened + memcpy(&iv.iv1, &iv.iv2, 32); + } + else { + // If the file has been shrunk and then re-expanded, we may have + // old hmacs that don't go with this data. ftruncate() is + // required to fill any added space with zeroes, so assume that's + // what happened if the buffer is all zeroes + bool all_zero = std::all_of(&m_rw_buffer[0], &m_rw_buffer[actual], [](char c) { + return c == 0; + }); + if (all_zero) + return ReadResult::StaleHmac; + return ReadResult::Failed; + } } - return bytes_read; + + // We may expect some address ranges of the destination buffer of + // AESCryptor::read() to stay unmodified, i.e. being overwritten with + // the same bytes as already present, and may have read-access to these + // from other threads while decryption is taking place. + // + // However, some implementations of AES_cbc_encrypt(), in particular + // OpenSSL, will put garbled bytes as an intermediate step during the + // operation which will lead to incorrect data being read by other + // readers concurrently accessing that page. Incorrect data leads to + // crashes. + // + // We therefore decrypt to a temporary buffer first and then copy the + // completely decrypted data after. + crypt(mode_Decrypt, pos, m_dst_buffer.get(), m_rw_buffer.get(), reinterpret_cast(&iv.iv1)); + memcpy_if_changed(dst, m_dst_buffer.get(), encryption_page_size); + return ReadResult::Success; } -void AESCryptor::try_read_block(FileDesc fd, off_t pos, char* dst) noexcept +void AESCryptor::try_read_block(FileDesc fd, SizeType pos, char* dst) noexcept { - ssize_t bytes_read = check_read(fd, real_offset(pos), m_rw_buffer.get(), block_size); + size_t bytes_read = check_read(fd, data_pos_to_file_pos(pos), m_rw_buffer.get()); if (bytes_read == 0) { std::cerr << "Read failed: 0x" << std::hex << pos << std::endl; - memset(dst, 0x55, block_size); + memset(dst, 0x55, encryption_page_size); return; } - iv_table& iv = get_iv_table(fd, pos, IVLookupMode::Refetch); + IVTable& iv = get_iv_table(fd, pos, IVLookupMode::Refetch); if (iv.iv1 == 0) { std::cerr << "Block never written: 0x" << std::hex << pos << std::endl; - memset(dst, 0xAA, block_size); + memset(dst, 0xAA, encryption_page_size); return; } - if (!check_hmac(m_rw_buffer.get(), bytes_read, iv.hmac1)) { + Hmac hmac; + calculate_hmac(hmac); + if (!constant_time_equals(hmac, iv.hmac1)) { if (iv.iv2 == 0) { std::cerr << "First write interrupted: 0x" << std::hex << pos << std::endl; } - if (check_hmac(m_rw_buffer.get(), bytes_read, iv.hmac2)) { + if (constant_time_equals(hmac, iv.hmac2)) { std::cerr << "Restore old IV: 0x" << std::hex << pos << std::endl; memcpy(&iv.iv1, &iv.iv2, 32); } @@ -496,40 +564,38 @@ void AESCryptor::try_read_block(FileDesc fd, off_t pos, char* dst) noexcept crypt(mode_Decrypt, pos, dst, m_rw_buffer.get(), reinterpret_cast(&iv.iv1)); } -void AESCryptor::write(FileDesc fd, off_t pos, const char* src, size_t size, WriteMarker* marker) noexcept +void AESCryptor::write(FileDesc fd, SizeType pos, const char* src, WriteMarker* marker) noexcept { - REALM_ASSERT(size % block_size == 0); - while (size > 0) { - iv_table& iv = get_iv_table(fd, pos); + IVTable& iv = get_iv_table(fd, pos); - memcpy(&iv.iv2, &iv.iv1, 32); // this is also copying the hmac - do { + memcpy(&iv.iv2, &iv.iv1, 32); // this is also copying the hmac + do { + ++iv.iv1; + // 0 is reserved for never-been-used, so bump if we just wrapped around + if (iv.iv1 == 0) ++iv.iv1; - // 0 is reserved for never-been-used, so bump if we just wrapped around - if (iv.iv1 == 0) - ++iv.iv1; - - crypt(mode_Encrypt, pos, m_rw_buffer.get(), src, reinterpret_cast(&iv.iv1)); - hmac_sha224(Span(reinterpret_cast(m_rw_buffer.get()), block_size), iv.hmac1, m_hmacKey); - // In the extremely unlikely case that both the old and new versions have - // the same hash we won't know which IV to use, so bump the IV until - // they're different. - } while (REALM_UNLIKELY(iv.hmac1 == iv.hmac2)); - - if (marker) - marker->mark(pos); - check_write(fd, iv_table_pos(pos), &iv, sizeof(iv)); - check_write(fd, real_offset(pos), m_rw_buffer.get(), block_size); - if (marker) - marker->unmark(); - - pos += block_size; - src += block_size; - size -= block_size; - } + + crypt(mode_Encrypt, pos, m_rw_buffer.get(), src, reinterpret_cast(&iv.iv1)); + hmac_sha224(Span(reinterpret_cast(m_rw_buffer.get()), encryption_page_size), iv.hmac1, + Span(m_key).sub_span<32>()); + // In the extremely unlikely case that both the old and new versions have + // the same hash we won't know which IV to use, so bump the IV until + // they're different. + } while (REALM_UNLIKELY(iv.hmac1 == iv.hmac2)); + + if (marker) + marker->mark(pos); + File::write_static(fd, iv_table_pos(pos), reinterpret_cast(&iv), sizeof(iv)); + // FIXME: doesn't this need a barrier? The IV table is very likely to + // make it to disk first due to being issued first and being earlier in + // the file, but not guaranteed + File::write_static(fd, data_pos_to_file_pos(pos), m_rw_buffer.get(), encryption_page_size); + if (marker) + marker->unmark(); + m_iv_buffer_cache[page_index(pos)] = iv; } -void AESCryptor::crypt(EncryptionMode mode, off_t pos, char* dst, const char* src, const char* stored_iv) noexcept +void AESCryptor::crypt(EncryptionMode mode, SizeType pos, char* dst, const char* src, const char* stored_iv) noexcept { uint8_t iv[aes_block_size] = {0}; memcpy(iv, stored_iv, 4); @@ -540,31 +606,32 @@ void AESCryptor::crypt(EncryptionMode mode, off_t pos, char* dst, const char* sr CCCryptorReset(cryptor, iv); size_t bytesEncrypted = 0; - CCCryptorStatus err = CCCryptorUpdate(cryptor, src, block_size, dst, block_size, &bytesEncrypted); + CCCryptorStatus err = + CCCryptorUpdate(cryptor, src, encryption_page_size, dst, encryption_page_size, &bytesEncrypted); REALM_ASSERT(err == kCCSuccess); - REALM_ASSERT(bytesEncrypted == block_size); + REALM_ASSERT(bytesEncrypted == encryption_page_size); #elif defined(_WIN32) ULONG cbData; int i; if (mode == mode_Encrypt) { - i = BCryptEncrypt(m_aes_key_handle, (PUCHAR)src, block_size, nullptr, (PUCHAR)iv, sizeof(iv), (PUCHAR)dst, - block_size, &cbData, 0); + i = BCryptEncrypt(m_aes_key_handle, (PUCHAR)src, encryption_page_size, nullptr, (PUCHAR)iv, sizeof(iv), + (PUCHAR)dst, encryption_page_size, &cbData, 0); REALM_ASSERT_RELEASE_EX(i == 0 && "BCryptEncrypt()", i); - REALM_ASSERT_RELEASE_EX(cbData == block_size && "BCryptEncrypt()", cbData); + REALM_ASSERT_RELEASE_EX(cbData == encryption_page_size && "BCryptEncrypt()", cbData); } else if (mode == mode_Decrypt) { - i = BCryptDecrypt(m_aes_key_handle, (PUCHAR)src, block_size, nullptr, (PUCHAR)iv, sizeof(iv), (PUCHAR)dst, - block_size, &cbData, 0); + i = BCryptDecrypt(m_aes_key_handle, (PUCHAR)src, encryption_page_size, nullptr, (PUCHAR)iv, sizeof(iv), + (PUCHAR)dst, encryption_page_size, &cbData, 0); REALM_ASSERT_RELEASE_EX(i == 0 && "BCryptDecrypt()", i); - REALM_ASSERT_RELEASE_EX(cbData == block_size && "BCryptDecrypt()", cbData); + REALM_ASSERT_RELEASE_EX(cbData == encryption_page_size && "BCryptDecrypt()", cbData); } else { REALM_UNREACHABLE(); } #else - if (!EVP_CipherInit_ex(m_ctx, EVP_aes_256_cbc(), NULL, m_aesKey.data(), iv, mode)) + if (!EVP_CipherInit_ex(m_ctx, EVP_aes_256_cbc(), NULL, m_key.data(), iv, mode)) handle_error(); int len; @@ -572,7 +639,7 @@ void AESCryptor::crypt(EncryptionMode mode, off_t pos, char* dst, const char* sr EVP_CIPHER_CTX_set_padding(m_ctx, 0); if (!EVP_CipherUpdate(m_ctx, reinterpret_cast(dst), &len, reinterpret_cast(src), - block_size)) + encryption_page_size)) handle_error(); // Finalize the encryption. Should not output further data. @@ -581,374 +648,324 @@ void AESCryptor::crypt(EncryptionMode mode, off_t pos, char* dst, const char* sr #endif } -EncryptedFileMapping::EncryptedFileMapping(SharedFileInfo& file, size_t file_offset, void* addr, size_t size, +EncryptedFile::EncryptedFile(const char* key, FileDesc fd) + : fd(fd) + , cryptor(key) +{ +} + +std::unique_ptr EncryptedFile::add_mapping(SizeType file_offset, void* addr, size_t size, + File::AccessMode access) +{ + auto mapping = std::make_unique(*this, file_offset, addr, size, access); + CheckedLockGuard lock(mutex); + mappings.push_back(mapping.get()); + return mapping; +} + +EncryptedFileMapping::EncryptedFileMapping(EncryptedFile& file, SizeType file_offset, void* addr, size_t size, File::AccessMode access, util::WriteObserver* observer, util::WriteMarker* marker) : m_file(file) - , m_page_shift(log2(realm::util::page_size())) - , m_blocks_per_page(static_cast(1ULL << m_page_shift) / block_size) - , m_num_decrypted(0) , m_access(access) , m_observer(observer) , m_marker(marker) #ifdef REALM_DEBUG - , m_validate_buffer(new char[static_cast(1ULL << m_page_shift)]) + , m_validate_buffer(new char[encryption_page_size]) #endif { - REALM_ASSERT(m_blocks_per_page * block_size == static_cast(1ULL << m_page_shift)); set(addr, size, file_offset); // throws - file.mappings.push_back(this); } EncryptedFileMapping::~EncryptedFileMapping() { + CheckedLockGuard lock(m_file.mutex); for (auto& e : m_page_state) { REALM_ASSERT(is_not(e, Writable)); } if (m_access == File::access_ReadWrite) { - flush(); - sync(); + do_flush(); + } + + auto it = std::find(m_file.mappings.begin(), m_file.mappings.end(), this); + REALM_ASSERT(it != m_file.mappings.end()); + if (it != m_file.mappings.end()) { + m_file.mappings.erase(it); } - m_file.mappings.erase(remove(m_file.mappings.begin(), m_file.mappings.end(), this)); } -char* EncryptedFileMapping::page_addr(size_t local_page_ndx) const noexcept +// offset within page, not within file +uint16_t EncryptedFileMapping::get_offset_of_address(const void* addr) const noexcept { - REALM_ASSERT_EX(local_page_ndx < m_page_state.size(), local_page_ndx, m_page_state.size()); - return static_cast(m_addr) + (local_page_ndx << m_page_shift); + return reinterpret_cast(addr) & (encryption_page_size - 1); } -void EncryptedFileMapping::mark_outdated(size_t local_page_ndx) noexcept +size_t EncryptedFileMapping::get_local_index_of_address(const void* addr, size_t offset) const noexcept { - if (local_page_ndx >= m_page_state.size()) - return; - REALM_ASSERT(is_not(m_page_state[local_page_ndx], UpToDate)); - REALM_ASSERT(is_not(m_page_state[local_page_ndx], Dirty)); - REALM_ASSERT(is_not(m_page_state[local_page_ndx], Writable)); + REALM_ASSERT_EX(addr >= m_addr, addr, m_addr); + return (reinterpret_cast(addr) - reinterpret_cast(m_addr) + offset) / encryption_page_size; +} + +bool EncryptedFileMapping::contains_page(size_t block_in_file) const noexcept +{ + return block_in_file - m_first_page < m_page_state.size(); +} + +char* EncryptedFileMapping::page_addr(size_t local_ndx) const noexcept +{ + REALM_ASSERT_DEBUG(local_ndx < m_page_state.size()); + return static_cast(m_addr) + (local_ndx * encryption_page_size); +} - size_t chunk_ndx = local_page_ndx >> page_to_chunk_shift; - if (m_chunk_dont_scan[chunk_ndx]) - m_chunk_dont_scan[chunk_ndx] = 0; +SizeType EncryptedFileMapping::page_pos(size_t local_ndx) const noexcept +{ + return SizeType(local_ndx + m_first_page) * encryption_page_size; } -bool EncryptedFileMapping::copy_up_to_date_page(size_t local_page_ndx) noexcept +// If we have multiple mappings for the same part of the file, one of them may +// already contain the page we're about to read and if so we can skip reading +// it and instead just memcpy it. +bool EncryptedFileMapping::copy_up_to_date_page(size_t local_ndx) noexcept { - REALM_ASSERT_EX(local_page_ndx < m_page_state.size(), local_page_ndx, m_page_state.size()); + REALM_ASSERT_EX(local_ndx < m_page_state.size(), local_ndx, m_page_state.size()); // Precondition: this method must never be called for a page which // is already up to date. - REALM_ASSERT(is_not(m_page_state[local_page_ndx], UpToDate)); - for (size_t i = 0; i < m_file.mappings.size(); ++i) { - EncryptedFileMapping* m = m_file.mappings[i]; - size_t page_ndx_in_file = local_page_ndx + m_first_page; - if (m == this || !m->contains_page(page_ndx_in_file)) + REALM_ASSERT(is_not(m_page_state[local_ndx], UpToDate)); + size_t ndx_in_file = local_ndx + m_first_page; + for (auto& m : m_file.mappings) { + m->assert_locked(); + if (m == this || !m->contains_page(ndx_in_file)) continue; - size_t shadow_mapping_local_ndx = page_ndx_in_file - m->m_first_page; - if (is(m->m_page_state[shadow_mapping_local_ndx], UpToDate)) { - memcpy(page_addr(local_page_ndx), m->page_addr(shadow_mapping_local_ndx), - static_cast(1ULL << m_page_shift)); - return true; - } + size_t other_mapping_ndx = ndx_in_file - m->m_first_page; + auto other_state = m->m_page_state[other_mapping_ndx]; + if (is(other_state, Writable) || is_not(other_state, UpToDate)) + continue; + + memcpy_if_changed(page_addr(local_ndx), m->page_addr(other_mapping_ndx), encryption_page_size); + set(m_page_state[local_ndx], UpToDate); + clear(m_page_state[local_ndx], StaleIV); + return true; } return false; } -void EncryptedFileMapping::refresh_page(size_t local_page_ndx, size_t required) +// Whenever we advance our reader view of the file we mark all previously +// up-to-date pages as being possibly stale. On the next access of the page we +// then check if the IV for that page has changed to determine if the page has +// actually changed or if we can just mark it as being up-to-date again. +bool EncryptedFileMapping::check_possibly_stale_page(size_t local_ndx) noexcept { - REALM_ASSERT_EX(local_page_ndx < m_page_state.size(), local_page_ndx, m_page_state.size()); - REALM_ASSERT(is_not(m_page_state[local_page_ndx], Dirty)); - REALM_ASSERT(is_not(m_page_state[local_page_ndx], Writable)); - char* addr = page_addr(local_page_ndx); - - if (!copy_up_to_date_page(local_page_ndx)) { - const size_t page_ndx_in_file = local_page_ndx + m_first_page; - const size_t end_page_ndx_in_file = m_first_page + m_page_state.size(); - off_t data_pos = off_t(page_ndx_in_file << m_page_shift); - if (is(m_page_state[local_page_ndx], StaleIV)) { - auto refreshed_ivs = - m_file.cryptor.refresh_ivs(m_file.fd, data_pos, page_ndx_in_file, end_page_ndx_in_file); - for (const auto& [page_ndx, state] : refreshed_ivs) { - size_t local_page_ndx_of_iv_change = page_ndx - m_first_page; - REALM_ASSERT_EX(contains_page(page_ndx), page_ndx, m_first_page, m_page_state.size()); - if (is(m_page_state[local_page_ndx_of_iv_change], Dirty | Writable)) { - continue; - } - switch (state) { - case IVRefreshState::UpToDate: - if (is(m_page_state[local_page_ndx_of_iv_change], StaleIV)) { - set(m_page_state[local_page_ndx_of_iv_change], UpToDate); - clear(m_page_state[local_page_ndx_of_iv_change], StaleIV); - } - break; - case IVRefreshState::RequiresRefresh: - clear(m_page_state[local_page_ndx_of_iv_change], StaleIV); - clear(m_page_state[local_page_ndx_of_iv_change], UpToDate); - break; - } - } - REALM_ASSERT_EX(refreshed_ivs.count(page_ndx_in_file) == 1, page_ndx_in_file, refreshed_ivs.size()); - if (refreshed_ivs[page_ndx_in_file] == IVRefreshState::UpToDate) { - return; - } - } - size_t size = static_cast(1ULL << m_page_shift); - size_t actual = m_file.cryptor.read(m_file.fd, data_pos, addr, size, m_observer); - if (actual < size) { - if (actual >= required) { - memset(addr + actual, 0x55, size - actual); - } - else { - size_t fs = to_size_t(File::get_size_static(m_file.fd)); - throw DecryptionFailed( - util::format("failed to decrypt block %1 in file of size %2", local_page_ndx + m_first_page, fs)); - } + if (is_not(m_page_state[local_ndx], StaleIV)) + return false; + size_t ndx_in_file = local_ndx + m_first_page; + bool did_change = m_file.cryptor.refresh_iv(m_file.fd, ndx_in_file); + // Update the page state in all mappings and not just the current one because + // refresh_iv() only returns true once per page per write. Deferring this + // until copy_up_to_date_page() almost works, but this mapping could be + // removed before the other mapping copies the page. + for (auto& m : m_file.mappings) { + m->assert_locked(); + if (!m->contains_page(ndx_in_file)) + continue; + auto& state = m->m_page_state[ndx_in_file - m->m_first_page]; + if (is(state, StaleIV)) { + REALM_ASSERT(is_not(state, UpToDate)); + clear(state, StaleIV); + if (!did_change) + set(state, UpToDate); } } - if (is_not(m_page_state[local_page_ndx], UpToDate)) - m_num_decrypted++; - set(m_page_state[local_page_ndx], UpToDate); - clear(m_page_state[local_page_ndx], StaleIV); + return !did_change; } -void EncryptedFileMapping::mark_pages_for_IV_check() +REALM_NORETURN +REALM_COLD +void EncryptedFileMapping::throw_decryption_error(size_t local_ndx, std::string_view msg) { - for (size_t i = 0; i < m_file.mappings.size(); ++i) { - EncryptedFileMapping* m = m_file.mappings[i]; - for (size_t pg = m->get_start_index(); pg < m->get_end_index(); ++pg) { - size_t local_page_ndx = pg - m->m_first_page; - if (is(m->m_page_state[local_page_ndx], UpToDate) && - is_not(m->m_page_state[local_page_ndx], Dirty | Writable)) { - REALM_ASSERT(is_not(m->m_page_state[local_page_ndx], StaleIV)); - clear(m->m_page_state[local_page_ndx], UpToDate); - set(m->m_page_state[local_page_ndx], StaleIV); - } + size_t fs = to_size_t(File::get_size_static(m_file.fd)); + throw DecryptionFailed(util::format("page %1 in file of size %2 %3", local_ndx + m_first_page, fs, msg)); +} + +void EncryptedFileMapping::refresh_page(size_t local_ndx, bool to_modify) +{ + REALM_ASSERT_EX(local_ndx < m_page_state.size(), local_ndx, m_page_state.size()); + REALM_ASSERT(is_not(m_page_state[local_ndx], Dirty)); + REALM_ASSERT(is_not(m_page_state[local_ndx], Writable)); + if (copy_up_to_date_page(local_ndx) || check_possibly_stale_page(local_ndx)) { + return; + } + + char* addr = page_addr(local_ndx); + switch (m_file.cryptor.read(m_file.fd, page_pos(local_ndx), addr, m_observer)) { + case AESCryptor::ReadResult::Eof: + if (!to_modify) + throw_decryption_error(local_ndx, "is out of bounds"); + break; + case AESCryptor::ReadResult::Uninitialized: + if (!to_modify) + throw_decryption_error(local_ndx, "has never been written to"); + break; + case AESCryptor::ReadResult::InterruptedFirstWrite: + if (!to_modify) + throw_decryption_error(local_ndx, "has never been successfully written to, but a write was begun"); + break; + case AESCryptor::ReadResult::StaleHmac: + break; + case AESCryptor::ReadResult::Failed: + throw_decryption_error( + local_ndx, "failed the HMAC check. Either the encryption key is incorrect or data is corrupted"); + case AESCryptor::ReadResult::Success: + break; + } + set(m_page_state[local_ndx], UpToDate); +} + +void EncryptedFile::mark_data_as_possibly_stale() +{ + + util::CheckedLockGuard lock(mutex); + cryptor.invalidate_ivs(); + for (auto& m : mappings) { + m->assert_locked(); + m->mark_pages_for_iv_check(); + } +} + +void EncryptedFileMapping::mark_pages_for_iv_check() +{ + for (auto& state : m_page_state) { + if (is(state, UpToDate) && is_not(state, Dirty | Writable)) { + REALM_ASSERT(is_not(state, StaleIV)); + clear(state, UpToDate); + set(state, StaleIV); } } } -void EncryptedFileMapping::write_and_update_all(size_t local_page_ndx, size_t begin_offset, - size_t end_offset) noexcept +void EncryptedFileMapping::write_and_update_all(size_t local_ndx, uint16_t offset, uint16_t size) noexcept { - REALM_ASSERT(is(m_page_state[local_page_ndx], Writable)); - REALM_ASSERT(is(m_page_state[local_page_ndx], UpToDate)); + REALM_ASSERT(is(m_page_state[local_ndx], Writable)); + REALM_ASSERT(is(m_page_state[local_ndx], UpToDate)); + REALM_ASSERT(is_not(m_page_state[local_ndx], StaleIV)); + REALM_ASSERT(offset + size <= encryption_page_size); // Go through all other mappings of this file and copy changes into those mappings - size_t page_ndx_in_file = local_page_ndx + m_first_page; - for (size_t i = 0; i < m_file.mappings.size(); ++i) { - EncryptedFileMapping* m = m_file.mappings[i]; - if (m != this && m->contains_page(page_ndx_in_file)) { - size_t shadow_local_page_ndx = page_ndx_in_file - m->m_first_page; - if (is(m->m_page_state[shadow_local_page_ndx], UpToDate) || - is(m->m_page_state[shadow_local_page_ndx], StaleIV)) { // only keep up to data pages up to date - memcpy(m->page_addr(shadow_local_page_ndx) + begin_offset, page_addr(local_page_ndx) + begin_offset, - end_offset - begin_offset); - if (is(m->m_page_state[shadow_local_page_ndx], StaleIV)) { - set(m->m_page_state[shadow_local_page_ndx], UpToDate); - clear(m->m_page_state[shadow_local_page_ndx], StaleIV); - } - } - else { - m->mark_outdated(shadow_local_page_ndx); - } + size_t ndx_in_file = local_ndx + m_first_page; + for (auto& m : m_file.mappings) { + m->assert_locked(); + if (m == this || !m->contains_page(ndx_in_file)) + continue; + + size_t other_local_ndx = ndx_in_file - m->m_first_page; + auto& state = m->m_page_state[other_local_ndx]; + if (is(state, UpToDate)) { + memcpy_if_changed(m->page_addr(other_local_ndx) + offset, page_addr(local_ndx) + offset, size); + } + // If the target page is possibly stale then we need to copy the entire + // page and not just the bytes we just touched as other parts of the + // page may be out of date + else if (is(state, StaleIV)) { + memcpy_if_changed(m->page_addr(other_local_ndx), page_addr(local_ndx), encryption_page_size); + set(state, UpToDate); + clear(state, StaleIV); } } - set(m_page_state[local_page_ndx], Dirty); - clear(m_page_state[local_page_ndx], Writable); - clear(m_page_state[local_page_ndx], StaleIV); - size_t chunk_ndx = local_page_ndx >> page_to_chunk_shift; - if (m_chunk_dont_scan[chunk_ndx]) - m_chunk_dont_scan[chunk_ndx] = 0; + set(m_page_state[local_ndx], Dirty); + clear(m_page_state[local_ndx], Writable); } - -void EncryptedFileMapping::validate_page(size_t local_page_ndx) noexcept +void EncryptedFileMapping::validate_page(size_t local_ndx) noexcept { #ifdef REALM_DEBUG - REALM_ASSERT(local_page_ndx < m_page_state.size()); - if (is_not(m_page_state[local_page_ndx], UpToDate)) + REALM_ASSERT(local_ndx < m_page_state.size()); + if (is_not(m_page_state[local_ndx], UpToDate)) return; - const size_t page_ndx_in_file = local_page_ndx + m_first_page; - if (!m_file.cryptor.read(m_file.fd, off_t(page_ndx_in_file << m_page_shift), m_validate_buffer.get(), - static_cast(1ULL << m_page_shift), m_observer)) - return; + switch (m_file.cryptor.read(m_file.fd, page_pos(local_ndx), m_validate_buffer.get(), m_observer)) { + case AESCryptor::ReadResult::Eof: + case AESCryptor::ReadResult::Uninitialized: + case AESCryptor::ReadResult::InterruptedFirstWrite: + case AESCryptor::ReadResult::StaleHmac: + return; + case AESCryptor::ReadResult::Failed: + abort(); + case AESCryptor::ReadResult::Success: + break; + } - for (size_t i = 0; i < m_file.mappings.size(); ++i) { - EncryptedFileMapping* m = m_file.mappings[i]; - size_t shadow_mapping_local_ndx = page_ndx_in_file - m->m_first_page; - if (m != this && m->contains_page(page_ndx_in_file) && is(m->m_page_state[shadow_mapping_local_ndx], Dirty)) { - memcpy(m_validate_buffer.get(), m->page_addr(shadow_mapping_local_ndx), - static_cast(1ULL << m_page_shift)); + const size_t ndx_in_file = local_ndx + m_first_page; + for (auto& m : m_file.mappings) { + m->assert_locked(); + size_t other_local_ndx = ndx_in_file - m->m_first_page; + if (m != this && m->contains_page(ndx_in_file) && is(m->m_page_state[other_local_ndx], Dirty)) { + memcpy(m_validate_buffer.get(), m->page_addr(other_local_ndx), encryption_page_size); break; } } - if (memcmp(m_validate_buffer.get(), page_addr(local_page_ndx), static_cast(1ULL << m_page_shift))) { - std::cerr << "mismatch " << this << ": fd(" << m_file.fd << ")" - << "page(" << local_page_ndx << "/" << m_page_state.size() << ") " << m_validate_buffer.get() << " " - << page_addr(local_page_ndx) << std::endl; + if (memcmp(m_validate_buffer.get(), page_addr(local_ndx), encryption_page_size) != 0) { + util::format(std::cerr, "mismatch %1: fd(%2) page(%3/%4) %5 %6\n", this, m_file.fd, local_ndx, + m_page_state.size(), m_validate_buffer.get(), page_addr(local_ndx)); REALM_TERMINATE(""); } #else - static_cast(local_page_ndx); + static_cast(local_ndx); #endif } void EncryptedFileMapping::validate() noexcept { #ifdef REALM_DEBUG - const size_t num_local_pages = m_page_state.size(); - for (size_t local_page_ndx = 0; local_page_ndx < num_local_pages; ++local_page_ndx) - validate_page(local_page_ndx); -#endif -} - -void EncryptedFileMapping::reclaim_page(size_t page_ndx) -{ -#ifdef _WIN32 - // On windows we don't know how to replace a page within a page range with a fresh one. - // instead we clear it. If the system runs with same-page-merging, this will reduce - // the number of used pages. - memset(page_addr(page_ndx), 0, static_cast(1) << m_page_shift); -#else - // On Posix compatible, we can request a new page in the middle of an already - // requested range, so that's what we do. This releases the backing store for the - // old page and gives us a shared zero-page that we can later demand-allocate, thus - // reducing the overall amount of used physical pages. - void* addr = page_addr(page_ndx); - void* addr2 = ::mmap(addr, 1 << m_page_shift, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); - if (addr != addr2) { - if (addr2 == 0) { - int err = errno; - throw SystemError(err, get_errno_msg("using mmap() to clear page failed", err)); - } - throw std::runtime_error("internal error in mmap()"); - } + for (size_t i = 0; i < m_page_state.size(); ++i) + validate_page(i); #endif } -/* This functions is a bit convoluted. It reclaims pages, but only does a limited amount of work - * each time it's called. It saves the progress in a 'progress_ptr' so that it can resume later - * from where it was stopped. - * - * The workload is composed of workunits, each unit signifying - * 1) A scanning of the state of 4K pages - * 2) One system call (to mmap to release a page and get a new one) - * 3) A scanning of 1K entries in the "don't scan" array (corresponding to 4M pages) - * Approximately - */ -void EncryptedFileMapping::reclaim_untouched(size_t& progress_index, size_t& work_limit) noexcept +void EncryptedFileMapping::do_flush(bool skip_validate) noexcept { - const auto scan_amount_per_workunit = 4096; - bool contiguous_scan = false; - size_t next_scan_payment = scan_amount_per_workunit; - const size_t last_index = get_end_index(); - - auto done_some_work = [&]() { - if (work_limit > 0) - work_limit--; - }; - - auto visit_and_potentially_reclaim = [&](size_t page_ndx) { - PageState& ps = m_page_state[page_ndx]; - if (is(ps, UpToDate)) { - if (is_not(ps, Touched) && is_not(ps, Dirty) && is_not(ps, Writable)) { - clear(ps, UpToDate); - reclaim_page(page_ndx); - m_num_decrypted--; - done_some_work(); + for (size_t i = 0; i < m_page_state.size(); ++i) { + if (is_not(m_page_state[i], Dirty)) { + if (!skip_validate) { + validate_page(i); } - contiguous_scan = false; + continue; } - clear(ps, Touched); - }; + m_file.cryptor.write(m_file.fd, page_pos(i), page_addr(i), m_marker); + clear(m_page_state[i], Dirty); + } - auto skip_chunk_if_possible = [&](size_t& page_ndx) // update vars corresponding to skipping a chunk if possible - { - size_t chunk_ndx = page_ndx >> page_to_chunk_shift; - if (m_chunk_dont_scan[chunk_ndx]) { - // skip to end of chunk - page_ndx = ((chunk_ndx + 1) << page_to_chunk_shift) - 1; - progress_index = m_first_page + page_ndx; - // postpone next scan payment - next_scan_payment += page_to_chunk_factor; - return true; - } - else - return false; - }; - - auto is_last_page_in_chunk = [](size_t page_ndx) { - auto page_to_chunk_mask = page_to_chunk_factor - 1; - return (page_ndx & page_to_chunk_mask) == page_to_chunk_mask; - }; - auto is_first_page_in_chunk = [](size_t page_ndx) { - auto page_to_chunk_mask = page_to_chunk_factor - 1; - return (page_ndx & page_to_chunk_mask) == 0; - }; - - while (work_limit > 0 && progress_index < last_index) { - size_t page_ndx = progress_index - m_first_page; - if (!skip_chunk_if_possible(page_ndx)) { - if (is_first_page_in_chunk(page_ndx)) { - contiguous_scan = true; - } - visit_and_potentially_reclaim(page_ndx); - // if we've scanned a full chunk contiguously, mark it as not needing scans - if (is_last_page_in_chunk(page_ndx)) { - if (contiguous_scan) { - m_chunk_dont_scan[page_ndx >> page_to_chunk_shift] = 1; - } - contiguous_scan = false; - } - } - // account for work performed: - if (page_ndx >= next_scan_payment) { - next_scan_payment = page_ndx + scan_amount_per_workunit; - done_some_work(); - } - ++progress_index; + // some of the tests call flush() on very small writes which results in + // validating on every flush being unreasonably slow + if (!skip_validate) { + validate(); } - return; } -void EncryptedFileMapping::flush() noexcept +void EncryptedFileMapping::flush(bool skip_validate) noexcept { - const size_t num_dirty_pages = m_page_state.size(); - for (size_t local_page_ndx = 0; local_page_ndx < num_dirty_pages; ++local_page_ndx) { - if (is_not(m_page_state[local_page_ndx], Dirty)) { - validate_page(local_page_ndx); - continue; - } - - size_t page_ndx_in_file = local_page_ndx + m_first_page; - m_file.cryptor.write(m_file.fd, off_t(page_ndx_in_file << m_page_shift), page_addr(local_page_ndx), - static_cast(1ULL << m_page_shift), m_marker); - clear(m_page_state[local_page_ndx], Dirty); - } + util::CheckedLockGuard lock(m_file.mutex); + do_flush(skip_validate); +} - validate(); +void EncryptedFileMapping::sync() noexcept +{ + util::CheckedLockGuard lock(m_file.mutex); + do_sync(); } #ifdef _MSC_VER #pragma warning(disable : 4297) // throw in noexcept #endif -void EncryptedFileMapping::sync() noexcept +void EncryptedFileMapping::do_sync() noexcept { + do_flush(); + #ifdef _WIN32 if (FlushFileBuffers(m_file.fd)) return; throw std::system_error(GetLastError(), std::system_category(), "FlushFileBuffers() failed"); #else fsync(m_file.fd); - // FIXME: on iOS/OSX fsync may not be enough to ensure crash safety. - // Consider adding fcntl(F_FULLFSYNC). This most likely also applies to msync. - // - // See description of fsync on iOS here: - // https://developer.apple.com/library/ios/documentation/System/Conceptual/ManPages_iPhoneOS/man2/fsync.2.html - // - // See also - // https://developer.apple.com/library/ios/documentation/Cocoa/Conceptual/CoreData/Articles/cdPersistentStores.html - // for a discussion of this related to core data. #endif } #ifdef _MSC_VER @@ -957,134 +974,80 @@ void EncryptedFileMapping::sync() noexcept void EncryptedFileMapping::write_barrier(const void* addr, size_t size) noexcept { - // Propagate changes to all other decrypted pages mapping the same memory - + CheckedLockGuard lock(m_file.mutex); + REALM_ASSERT(size > 0); REALM_ASSERT(m_access == File::access_ReadWrite); - size_t first_accessed_local_page = get_local_index_of_address(addr); - size_t first_offset = static_cast(addr) - page_addr(first_accessed_local_page); - const char* last_accessed_address = static_cast(addr) + (size == 0 ? 0 : size - 1); - size_t last_accessed_local_page = get_local_index_of_address(last_accessed_address); - size_t pages_size = m_page_state.size(); - - // propagate changes to first page (update may be partial, may also be to last page) - if (first_accessed_local_page < pages_size) { - REALM_ASSERT_EX(is(m_page_state[first_accessed_local_page], UpToDate), - m_page_state[first_accessed_local_page]); - if (first_accessed_local_page == last_accessed_local_page) { - size_t last_offset = last_accessed_address - page_addr(first_accessed_local_page); - write_and_update_all(first_accessed_local_page, first_offset, last_offset + 1); - } - else - write_and_update_all(first_accessed_local_page, first_offset, static_cast(1) << m_page_shift); - } - // propagate changes to pages between first and last page (update only full pages) - for (size_t idx = first_accessed_local_page + 1; idx < last_accessed_local_page && idx < pages_size; ++idx) { - REALM_ASSERT(is(m_page_state[idx], UpToDate)); - write_and_update_all(idx, 0, static_cast(1) << m_page_shift); - } - // propagate changes to the last page (update may be partial) - if (first_accessed_local_page < last_accessed_local_page && last_accessed_local_page < pages_size) { - REALM_ASSERT(is(m_page_state[last_accessed_local_page], UpToDate)); - size_t last_offset = last_accessed_address - page_addr(last_accessed_local_page); - write_and_update_all(last_accessed_local_page, 0, last_offset + 1); - } -} -void EncryptedFileMapping::read_barrier(const void* addr, size_t size, Header_to_size header_to_size, bool to_modify) -{ - size_t first_accessed_local_page = get_local_index_of_address(addr); - size_t page_size = 1ULL << m_page_shift; - size_t required = get_offset_of_address(addr) + size; - { - // make sure the first page is available - PageState& ps = m_page_state[first_accessed_local_page]; - if (is_not(ps, Touched)) - set(ps, Touched); - if (is_not(ps, UpToDate)) - refresh_page(first_accessed_local_page, to_modify ? 0 : required); - if (to_modify) - set(ps, Writable); - } + size_t local_ndx = get_local_index_of_address(addr); + auto offset_in_page = uint16_t(static_cast(addr) - page_addr(local_ndx)); + size += offset_in_page; - // force the page reclaimer to look into pages in this chunk: - size_t chunk_ndx = first_accessed_local_page >> page_to_chunk_shift; - if (m_chunk_dont_scan[chunk_ndx]) - m_chunk_dont_scan[chunk_ndx] = 0; - - if (header_to_size) { - // We know it's an array, and array headers are 8-byte aligned, so it is - // included in the first page which was handled above. - size = header_to_size(static_cast(addr)); - required = get_offset_of_address(addr) + size; + // Propagate changes to all other decrypted pages mapping the same memory + while (size > 0) { + REALM_ASSERT(local_ndx < m_page_state.size()); + REALM_ASSERT(is(m_page_state[local_ndx], PageState::Writable)); + auto bytes_in_page = uint16_t(std::min(encryption_page_size, size) - offset_in_page); + write_and_update_all(local_ndx, offset_in_page, bytes_in_page); + size -= offset_in_page + bytes_in_page; + offset_in_page = 0; + ++local_ndx; } +} - size_t last_idx = get_local_index_of_address(addr, size == 0 ? 0 : size - 1); - size_t pages_size = m_page_state.size(); - - // We already checked first_accessed_local_page above, so we start the loop - // at first_accessed_local_page + 1 to check the following page. - for (size_t idx = first_accessed_local_page + 1; idx <= last_idx && idx < pages_size; ++idx) { - required -= page_size; - // force the page reclaimer to look into pages in this chunk - chunk_ndx = idx >> page_to_chunk_shift; - if (m_chunk_dont_scan[chunk_ndx]) - m_chunk_dont_scan[chunk_ndx] = 0; - - PageState& ps = m_page_state[idx]; - if (is_not(ps, Touched)) - set(ps, Touched); +void EncryptedFileMapping::read_barrier(const void* addr, size_t size, bool to_modify) +{ + CheckedLockGuard lock(m_file.mutex); + REALM_ASSERT(size > 0); + size_t begin = get_local_index_of_address(addr); + size_t end = get_local_index_of_address(addr, size - 1); + for (size_t local_ndx = begin; local_ndx <= end; ++local_ndx) { + PageState& ps = m_page_state[local_ndx]; if (is_not(ps, UpToDate)) - refresh_page(idx, to_modify ? 0 : required); + refresh_page(local_ndx, to_modify); if (to_modify) set(ps, Writable); } } -void EncryptedFileMapping::extend_to(size_t offset, size_t new_size) +void EncryptedFileMapping::extend_to(SizeType offset, size_t new_size) { - REALM_ASSERT_EX(new_size % page_size() == 0, new_size, page_size()); - size_t num_pages = new_size >> m_page_shift; - m_page_state.resize(num_pages, PageState::Clean); - m_chunk_dont_scan.resize((num_pages + page_to_chunk_factor - 1) >> page_to_chunk_shift, false); - m_file.cryptor.set_file_size((off_t)(offset + new_size)); + CheckedLockGuard lock(m_file.mutex); + REALM_ASSERT_EX(new_size % encryption_page_size == 0, new_size, encryption_page_size); + m_page_state.resize(page_count(new_size), PageState::Clean); + m_file.cryptor.set_data_size(offset + SizeType(new_size)); } -void EncryptedFileMapping::set(void* new_addr, size_t new_size, size_t new_file_offset) +void EncryptedFileMapping::set(void* new_addr, size_t new_size, SizeType new_file_offset) { - REALM_ASSERT(new_file_offset % (1ULL << m_page_shift) == 0); - REALM_ASSERT(new_size % (1ULL << m_page_shift) == 0); + CheckedLockGuard lock(m_file.mutex); + REALM_ASSERT(new_file_offset % encryption_page_size == 0); + REALM_ASSERT(new_size % encryption_page_size == 0); // This seems dangerous - correct operation in a setting with multiple (partial) // mappings of the same file would rely on ordering of individual mapping requests. // Currently we only ever extend the file - but when we implement continuous defrag, // this design should be revisited. - m_file.cryptor.set_file_size(off_t(new_size + new_file_offset)); + m_file.cryptor.set_data_size(new_file_offset + SizeType(new_size)); - flush(); + do_flush(); m_addr = new_addr; - m_first_page = new_file_offset >> m_page_shift; - size_t num_pages = new_size >> m_page_shift; - - m_num_decrypted = 0; + // set_data_size() would have thrown if this cast would overflow + m_first_page = size_t(new_file_offset / encryption_page_size); m_page_state.clear(); - m_chunk_dont_scan.clear(); - - m_page_state.resize(num_pages, PageState(0)); - m_chunk_dont_scan.resize((num_pages + page_to_chunk_factor - 1) >> page_to_chunk_shift, false); + m_page_state.resize(new_size / encryption_page_size, PageState::Clean); } -File::SizeType encrypted_size_to_data_size(File::SizeType size) noexcept +SizeType encrypted_size_to_data_size(SizeType size) noexcept { - if (size == 0) - return 0; - return fake_offset(size); + return size == 0 ? 0 : file_pos_to_data_pos(size); } -File::SizeType data_size_to_encrypted_size(File::SizeType size) noexcept +SizeType data_size_to_encrypted_size(SizeType size) noexcept { - size_t ps = page_size(); - return real_offset((size + ps - 1) & ~(ps - 1)); + SizeType r = size % encryption_page_size; + size += r ? encryption_page_size - r : 0; + return data_pos_to_file_pos(size); } } // namespace realm::util #else diff --git a/src/realm/util/encrypted_file_mapping.hpp b/src/realm/util/encrypted_file_mapping.hpp index 54b056e70a8..d92672950da 100644 --- a/src/realm/util/encrypted_file_mapping.hpp +++ b/src/realm/util/encrypted_file_mapping.hpp @@ -19,190 +19,166 @@ #ifndef REALM_UTIL_ENCRYPTED_FILE_MAPPING_HPP #define REALM_UTIL_ENCRYPTED_FILE_MAPPING_HPP -#include -#include #include +#include +#include + +#include + +namespace realm::util { #if REALM_ENABLE_ENCRYPTION -typedef size_t (*Header_to_size)(const char* addr); +class EncryptedFileMapping; -#include +class EncryptedFile { +public: + EncryptedFile(const char* key, FileDesc fd); -namespace realm::util { + std::unique_ptr add_mapping(File::SizeType file_offset, void* addr, size_t size, + File::AccessMode access) REQUIRES(!mutex); + + const char* get_key() const noexcept REQUIRES(!mutex) + { + // It's safe to return a pointer into cryptor outside the lock because + // the key doesn't actually change and doesn't need to be guarded by + // the mutex at all. + util::CheckedLockGuard lock(mutex); + return cryptor.get_key(); + } -struct SharedFileInfo; + void mark_data_as_possibly_stale() REQUIRES(!mutex); + +private: + friend class EncryptedFileMapping; + + CheckedMutex mutex; + FileDesc fd; + AESCryptor cryptor GUARDED_BY(mutex); + std::vector mappings GUARDED_BY(mutex); +}; class EncryptedFileMapping { public: - // Adds the newly-created object to file.mappings iff it's successfully constructed - EncryptedFileMapping(SharedFileInfo& file, size_t file_offset, void* addr, size_t size, File::AccessMode access, - util::WriteObserver* observer = nullptr, util::WriteMarker* marker = nullptr); + EncryptedFileMapping(EncryptedFile& file, File::SizeType file_offset, void* addr, size_t size, + File::AccessMode access, util::WriteObserver* observer = nullptr, + util::WriteMarker* marker = nullptr); ~EncryptedFileMapping(); // Default implementations of copy/assign can trigger multiple destructions EncryptedFileMapping(const EncryptedFileMapping&) = delete; EncryptedFileMapping& operator=(const EncryptedFileMapping&) = delete; - // Encrypt all dirty pages, push them to shared cache and mark them read-only + // Encrypt all dirty blocks, push them to shared cache and mark them read-only // Does not call fsync - void flush() noexcept; + void flush(bool skip_validate) noexcept REQUIRES(!m_file.mutex); - // Sync the image of this file in shared cache to disk. Does not imply flush. - void sync() noexcept; + // Flush and then sync the image of this file in shared cache to disk. + void sync() noexcept REQUIRES(!m_file.mutex); // Make sure that memory in the specified range is synchronized with any // changes made globally visible through call to write_barrier or refresh_outdated_pages(). // Optionally mark the pages for later modification - void read_barrier(const void* addr, size_t size, Header_to_size header_to_size, bool to_modify); + void read_barrier(const void* addr, size_t size, bool to_modify) REQUIRES(!m_file.mutex); // Ensures that any changes made to memory in the specified range // becomes visible to any later calls to read_barrier() // Pages selected must have been marked for modification at an earlier read barrier - void write_barrier(const void* addr, size_t size) noexcept; - - // Mark pages for later checks of the ivs on disk. If the IVs have changed compared to - // the in memory versions the page will later need to be refreshed. - // This is the process by which a reader in a multiprocess scenario detects if its - // mapping should be refreshed while advancing versions. - // The pages marked for IV-checks will be refetched and re-decrypted by later calls to read_barrier. - void mark_pages_for_IV_check(); + void write_barrier(const void* addr, size_t size) noexcept REQUIRES(!m_file.mutex); // Set this mapping to a new address and size // Flushes any remaining dirty pages from the old mapping - void set(void* new_addr, size_t new_size, size_t new_file_offset); + void set(void* new_addr, size_t new_size, File::SizeType new_file_offset) REQUIRES(!m_file.mutex); // Extend the size of this mapping. Memory holding decrypted pages must // have been allocated earlier - void extend_to(size_t offset, size_t new_size); + void extend_to(File::SizeType offset, size_t new_size) REQUIRES(!m_file.mutex); - size_t collect_decryption_count() - { - return m_num_decrypted; - } - // reclaim any untouched pages - this is thread safe with respect to - // concurrent access/touching of pages - but must be called with the mutex locked. - void reclaim_untouched(size_t& progress_ptr, size_t& accumulated_savings) noexcept; + bool contains_page(size_t block_in_file) const noexcept REQUIRES(m_file.mutex); + size_t get_local_index_of_address(const void* addr, size_t offset = 0) const noexcept REQUIRES(m_file.mutex); + uint16_t get_offset_of_address(const void* addr) const noexcept REQUIRES(m_file.mutex); - bool contains_page(size_t page_in_file) const; - size_t get_local_index_of_address(const void* addr, size_t offset = 0) const; - size_t get_offset_of_address(const void* addr) const; - - size_t get_end_index() - { - return m_first_page + m_page_state.size(); - } - size_t get_start_index() - { - return m_first_page; - } - void set_marker(WriteMarker* marker) + void set_marker(WriteMarker* marker) noexcept { m_marker = marker; } - void set_observer(WriteObserver* observer) + void set_observer(WriteObserver* observer) noexcept { m_observer = observer; } -#if REALM_DEBUG - std::string print_debug(); -#endif // REALM_DEBUG + std::string print_debug() REQUIRES(!m_file.mutex); private: - SharedFileInfo& m_file; + friend class EncryptedFile; - size_t m_page_shift; - size_t m_blocks_per_page; + EncryptedFile& m_file; + void* m_addr GUARDED_BY(m_file.mutex) = nullptr; + size_t m_first_page GUARDED_BY(m_file.mutex); - void* m_addr = nullptr; - - size_t m_first_page; - size_t m_num_decrypted; // 1 for every page decrypted - - enum PageState { + enum PageState : uint8_t { Clean = 0, - Touched = 1, // a ref->ptr translation has taken place - UpToDate = 2, // the page is fully up to date - StaleIV = 4, // the page needs to check the on disk IV for changes by other processes - Writable = 8, // the page is open for writing - Dirty = 16 // the page has been modified with respect to what's on file. + UpToDate = 1, // the page is fully up to date + StaleIV = 2, // the page needs to check the on disk IV for changes by other processes + Writable = 4, // the page is open for writing + Dirty = 8 // the page has been modified with respect to what's on file. }; - std::vector m_page_state; + std::vector m_page_state GUARDED_BY(m_file.mutex); // little helpers: - inline void clear(PageState& ps, int p) + static constexpr void clear(PageState& ps, int p) { ps = PageState(ps & ~p); } - inline bool is_not(PageState& ps, int p) + static constexpr bool is_not(PageState& ps, int p) { return (ps & p) == 0; } - inline bool is(PageState& ps, int p) + static constexpr bool is(PageState& ps, int p) { return (ps & p) != 0; } - inline void set(PageState& ps, int p) + static constexpr void set(PageState& ps, int p) { ps = PageState(ps | p); } - // 1K pages form a chunk - this array allows us to skip entire chunks during scanning - std::vector m_chunk_dont_scan; - static constexpr int page_to_chunk_shift = 10; - static constexpr size_t page_to_chunk_factor = size_t(1) << page_to_chunk_shift; - File::AccessMode m_access; + const File::AccessMode m_access; util::WriteObserver* m_observer = nullptr; util::WriteMarker* m_marker = nullptr; #ifdef REALM_DEBUG - std::unique_ptr m_validate_buffer; + std::unique_ptr m_validate_buffer GUARDED_BY(m_file.mutex); #endif - char* page_addr(size_t local_page_ndx) const noexcept; - - void mark_outdated(size_t local_page_ndx) noexcept; - bool copy_up_to_date_page(size_t local_page_ndx) noexcept; - void refresh_page(size_t local_page_ndx, size_t required); - void write_and_update_all(size_t local_page_ndx, size_t begin_offset, size_t end_offset) noexcept; - void reclaim_page(size_t page_ndx); - void validate_page(size_t local_page_ndx) noexcept; - void validate() noexcept; -}; - -inline size_t EncryptedFileMapping::get_offset_of_address(const void* addr) const -{ - REALM_ASSERT_3(reinterpret_cast(addr), >=, reinterpret_cast(m_addr)); - return (reinterpret_cast(addr) - reinterpret_cast(m_addr)) & ((1ULL << m_page_shift) - 1); -} - -inline size_t EncryptedFileMapping::get_local_index_of_address(const void* addr, size_t offset) const -{ - REALM_ASSERT_EX(addr >= m_addr, size_t(addr), size_t(m_addr)); + char* page_addr(size_t local_ndx) const noexcept REQUIRES(m_file.mutex); + File::SizeType page_pos(size_t local_ndx) const noexcept REQUIRES(m_file.mutex); + bool copy_up_to_date_page(size_t local_ndx) noexcept REQUIRES(m_file.mutex); + bool check_possibly_stale_page(size_t local_ndx) noexcept REQUIRES(m_file.mutex); + void refresh_page(size_t local_ndx, bool to_modify) REQUIRES(m_file.mutex); + void write_and_update_all(size_t local_ndx, uint16_t offset, uint16_t size) noexcept REQUIRES(m_file.mutex); + void validate_page(size_t local_ndx) noexcept REQUIRES(m_file.mutex); + void validate() noexcept REQUIRES(m_file.mutex); + void do_flush(bool skip_validate = false) noexcept REQUIRES(m_file.mutex); + void do_sync() noexcept REQUIRES(m_file.mutex); + REALM_NORETURN void throw_decryption_error(size_t ndx, std::string_view msg) REQUIRES(m_file.mutex); - size_t local_ndx = - ((reinterpret_cast(addr) - reinterpret_cast(m_addr) + offset) >> m_page_shift); - REALM_ASSERT_EX(local_ndx < m_page_state.size(), local_ndx, m_page_state.size(), size_t(addr), size_t(m_addr), - m_page_shift); - return local_ndx; -} + // Mark pages for later checks of the ivs on disk. If the IVs have changed compared to + // the in memory versions the page will later need to be refreshed. + // This is the process by which a reader in a multiprocess scenario detects if its + // mapping should be refreshed while advancing versions. + // The pages marked for IV-checks will be refetched and re-decrypted by later calls to read_barrier. + void mark_pages_for_iv_check() REQUIRES(m_file.mutex); -inline bool EncryptedFileMapping::contains_page(size_t page_in_file) const -{ - // first check for (page_in_file >= m_first_page) so that the following - // subtraction using unsigned types never wraps under 0 - return page_in_file >= m_first_page && page_in_file - m_first_page < m_page_state.size(); -} + void assert_locked() noexcept ASSERT_CAPABILITY(m_file.mutex) {} +}; -#if REALM_DEBUG +// LCOV_EXCL_START inline std::string EncryptedFileMapping::print_debug() { +#if REALM_DEBUG auto state_name = [](const PageState& s) -> std::string { if (s == PageState::Clean) { return "Clean"; } std::string state = "{"; - if (s & PageState::Touched) { - state += "Touched"; - } if (s & PageState::UpToDate) { state += "UpToDate"; } @@ -218,6 +194,8 @@ inline std::string EncryptedFileMapping::print_debug() state += "}"; return state; }; + + util::CheckedLockGuard lock(m_file.mutex); std::string page_states; for (PageState& s : m_page_state) { if (!page_states.empty()) { @@ -227,23 +205,25 @@ inline std::string EncryptedFileMapping::print_debug() } return util::format("%1 pages from %2 to %3: %4", m_page_state.size(), m_first_page, m_page_state.size() + m_first_page, page_states); -} +#else + return ""; #endif // REALM_DEBUG +} +// LCOV_EXCL_STOP -constexpr inline size_t c_min_encrypted_file_size = 8192; - -} // namespace realm::util +constexpr inline File::SizeType c_min_encrypted_file_size = 8192; +#else // REALM_ENABLE_ENCRYPTION +class EncryptedFile { +public: + static void mark_data_as_possibly_stale() noexcept {} +}; +class EncryptedFileMapping {}; #endif // REALM_ENABLE_ENCRYPTION -namespace realm::util { /// Thrown by EncryptedFileMapping if a file opened is non-empty and does not /// contain valid encrypted data struct DecryptionFailed : FileAccessError { - DecryptionFailed() - : FileAccessError(ErrorCodes::DecryptionFailed, get_message_with_bt(""), std::string(), 0) - { - } DecryptionFailed(const std::string& msg) : FileAccessError(ErrorCodes::DecryptionFailed, get_message_with_bt(msg), std::string()) { diff --git a/src/realm/util/file.cpp b/src/realm/util/file.cpp index 6a604db8a8e..fc3e823734f 100644 --- a/src/realm/util/file.cpp +++ b/src/realm/util/file.cpp @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -49,27 +50,13 @@ using namespace realm::util; -namespace { -constexpr size_t c_min_supported_page_size = 4096; -size_t get_page_size() -{ -#ifdef _WIN32 - SYSTEM_INFO sysinfo; - GetNativeSystemInfo(&sysinfo); - // DWORD size = sysinfo.dwPageSize; - // On windows we use the allocation granularity instead - DWORD size = sysinfo.dwAllocationGranularity; -#else - long size = sysconf(_SC_PAGESIZE); +#ifndef _WIN32 +// All mainstream platforms other than Windows migrated to 64-bit off_t many +// years ago. Supporting 32-bit off_t is possible, but not currently implemented. +static_assert(sizeof(off_t) == 8 || sizeof(size_t) == 4); #endif - REALM_ASSERT(size > 0 && size % c_min_supported_page_size == 0); - return static_cast(size); -} - -// This variable exists such that page_size() can return the page size without having to make any system calls. -// It could also have been a static local variable, but Valgrind/Helgrind gives a false error on that. -std::atomic cached_page_size = get_page_size(); +namespace { bool for_each_helper(const std::string& path, const std::string& dir, realm::util::File::ForEachHandler& handler) { using File = realm::util::File; @@ -412,25 +399,64 @@ std::string make_temp_file(const char* prefix) size_t page_size() { - return cached_page_size.load(std::memory_order::memory_order_relaxed); + static constexpr size_t c_min_supported_page_size = 4096; + static size_t page_size = [] { +#ifdef _WIN32 + SYSTEM_INFO sysinfo; + GetNativeSystemInfo(&sysinfo); + // DWORD size = sysinfo.dwPageSize; + // On windows we use the allocation granularity instead + DWORD size = sysinfo.dwAllocationGranularity; +#else + long size = sysconf(_SC_PAGESIZE); +#endif + REALM_ASSERT(size > 0 && size % c_min_supported_page_size == 0); + return static_cast(size); + }(); + return page_size; } -OnlyForTestingPageSizeChange::OnlyForTestingPageSizeChange(size_t new_page_size) +File::File() = default; +File::File(std::string_view path, Mode m) { - REALM_ASSERT(new_page_size % c_min_supported_page_size == 0); - cached_page_size = new_page_size; + open(path, m); } -OnlyForTestingPageSizeChange::~OnlyForTestingPageSizeChange() +File::~File() noexcept { - cached_page_size = get_page_size(); + close(); } -void File::open_internal(const std::string& path, AccessMode a, CreateMode c, int flags, bool* success) +File::File(File&& f) noexcept +{ + m_fd = std::exchange(f.m_fd, invalid_fd); +#ifdef REALM_FILELOCK_EMULATION + m_pipe_fd = std::exchange(f.m_pipe_fd, invalid_fd); + m_has_exclusive_lock = std::exchange(f.m_has_exclusive_lock, false); +#endif + m_have_lock = std::exchange(f.m_have_lock, false); + m_encryption = std::move(f.m_encryption); +} + +File& File::operator=(File&& f) noexcept +{ + close(); + + m_fd = std::exchange(f.m_fd, invalid_fd); +#ifdef REALM_FILELOCK_EMULATION + m_pipe_fd = std::exchange(f.m_pipe_fd, invalid_fd); + m_has_exclusive_lock = std::exchange(f.m_has_exclusive_lock, false); +#endif + m_have_lock = std::exchange(f.m_have_lock, false); + m_encryption = std::move(f.m_encryption); + return *this; +} + + +void File::open_internal(std::string_view path, AccessMode a, CreateMode c, int flags, bool* success) { REALM_ASSERT_RELEASE(!is_attached()); m_path = path; // for error reporting and debugging - m_cached_unique_id = {}; #ifdef _WIN32 // Windows version @@ -464,9 +490,8 @@ void File::open_internal(const std::string& path, AccessMode a, CreateMode c, in break; } DWORD flags_and_attributes = 0; - HANDLE handle = CreateFile2(u8path(path).c_str(), desired_access, share_mode, creation_disposition, nullptr); - if (handle != INVALID_HANDLE_VALUE) { - m_fd = handle; + m_fd = CreateFile2(u8path(m_path).c_str(), desired_access, share_mode, creation_disposition, nullptr); + if (m_fd != INVALID_HANDLE_VALUE) { m_have_lock = false; if (success) *success = true; @@ -486,14 +511,14 @@ void File::open_internal(const std::string& path, AccessMode a, CreateMode c, in switch (err) { case ERROR_SHARING_VIOLATION: case ERROR_ACCESS_DENIED: - throw FileAccessError(ErrorCodes::PermissionDenied, msg, path, int(err)); + throw FileAccessError(ErrorCodes::PermissionDenied, msg, m_path, int(err)); case ERROR_FILE_NOT_FOUND: case ERROR_PATH_NOT_FOUND: - throw FileAccessError(ErrorCodes::FileNotFound, msg, path, int(err)); + throw FileAccessError(ErrorCodes::FileNotFound, msg, m_path, int(err)); case ERROR_FILE_EXISTS: - throw Exists(msg, path); + throw Exists(msg, m_path); default: - throw FileAccessError(ErrorCodes::FileOperationFailed, msg, path, int(err)); + throw FileAccessError(ErrorCodes::FileOperationFailed, msg, m_path, int(err)); } #else // POSIX version @@ -521,7 +546,7 @@ void File::open_internal(const std::string& path, AccessMode a, CreateMode c, in flags2 |= O_TRUNC; if (flags & flag_Append) flags2 |= O_APPEND; - int fd = ::open(path.c_str(), flags2, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); + int fd = ::open(m_path.c_str(), flags2, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH); if (0 <= fd) { m_fd = fd; m_have_lock = false; @@ -551,7 +576,7 @@ void File::open_internal(const std::string& path, AccessMode a, CreateMode c, in msg = util::format("Failed to open file at path '%1': parent directory does not exist", path); throw FileAccessError(ErrorCodes::FileNotFound, msg, path, err); case EEXIST: - throw Exists(msg, path); + throw Exists(msg, m_path); case ENOTDIR: msg = format("Failed to open file at path '%1': parent path is not a directory", path); [[fallthrough]]; @@ -565,57 +590,23 @@ void File::open_internal(const std::string& path, AccessMode a, CreateMode c, in void File::close() noexcept { -#ifdef _WIN32 // Windows version - - if (!m_fd) + m_encryption.reset(); + if (m_fd == invalid_fd) return; if (m_have_lock) unlock(); - +#ifdef _WIN32 // Windows version BOOL r = CloseHandle(m_fd); REALM_ASSERT_RELEASE(r); - m_fd = nullptr; - #else // POSIX version - - if (m_fd < 0) - return; - if (m_have_lock) - unlock(); int r = ::close(m_fd); REALM_ASSERT_RELEASE(r == 0); - m_fd = -1; - #endif -} -void File::close_static(FileDesc fd) -{ -#ifdef _WIN32 - if (!fd) - return; - - if (!CloseHandle(fd)) - throw std::system_error(GetLastError(), std::system_category(), - "CloseHandle() failed from File::close_static()"); -#else - if (fd < 0) - return; - - int ret = -1; - do { - ret = ::close(fd); - } while (ret == -1 && errno == EINTR); - - if (ret != 0) { - int err = errno; // Eliminate any risk of clobbering - if (err == EBADF || err == EIO) - throw SystemError(err, "File::close_static() failed"); - } -#endif + m_fd = invalid_fd; } -size_t File::read_static(FileDesc fd, char* data, size_t size) +size_t File::read_static(FileDesc fd, SizeType pos, char* data, size_t size) { #ifdef _WIN32 // Windows version char* const data_0 = data; @@ -624,27 +615,29 @@ size_t File::read_static(FileDesc fd, char* data, size_t size) if (int_less_than(size, n)) n = static_cast(size); DWORD r = 0; - if (!ReadFile(fd, data, n, &r, 0)) - goto error; - if (r == 0) - break; - REALM_ASSERT_RELEASE(r <= n); + OVERLAPPED o{}; + o.Offset = static_cast(pos); + o.OffsetHigh = static_cast(pos >> 32); + if (!ReadFile(fd, data, n, &r, &o)) { + DWORD err = GetLastError(); + if (err == ERROR_HANDLE_EOF) + break; + throw SystemError(int(err), "ReadFile() failed"); + } + REALM_ASSERT_RELEASE(r > 0 && r <= n); size -= size_t(r); data += size_t(r); + pos += r; } return data - data_0; -error: - DWORD err = GetLastError(); // Eliminate any risk of clobbering - throw SystemError(int(err), "ReadFile() failed"); - #else // POSIX version char* const data_0 = data; while (0 < size) { // POSIX requires that 'n' is less than or equal to SSIZE_MAX size_t n = std::min(size, size_t(SSIZE_MAX)); - ssize_t r = ::read(fd, data, n); + ssize_t r = pread(fd, data, n, pos); if (r == 0) break; if (r < 0) @@ -652,6 +645,7 @@ size_t File::read_static(FileDesc fd, char* data, size_t size) REALM_ASSERT_RELEASE(size_t(r) <= n); size -= size_t(r); data += size_t(r); + pos += r; } return data - data_0; @@ -663,26 +657,21 @@ size_t File::read_static(FileDesc fd, char* data, size_t size) } -size_t File::read(char* data, size_t size) +size_t File::read(SizeType pos, char* data, size_t size) { REALM_ASSERT_RELEASE(is_attached()); - if (m_encryption_key) { - uint64_t pos_original = File::get_file_pos(m_fd); - REALM_ASSERT(!int_cast_has_overflow(pos_original)); - size_t pos = size_t(pos_original); - Map read_map(*this, access_ReadOnly, static_cast(pos + size)); - realm::util::encryption_read_barrier(read_map, pos, size); - memcpy(data, read_map.get_addr() + pos, size); - uint64_t cur = File::get_file_pos(m_fd); - seek_static(m_fd, cur + size); - return read_map.get_size() - pos; + if (m_encryption) { + Map read_map(*this, pos, access_ReadOnly, size); + util::encryption_read_barrier(read_map, 0, size); + memcpy(data, read_map.get_addr(), size); + return size; } - return read_static(m_fd, data, size); + return read_static(m_fd, pos, data, size); } -void File::write_static(FileDesc fd, const char* data, size_t size) +void File::write_static(FileDesc fd, SizeType pos, const char* data, size_t size) { #ifdef _WIN32 while (0 < size) { @@ -690,11 +679,15 @@ void File::write_static(FileDesc fd, const char* data, size_t size) if (int_less_than(size, n)) n = static_cast(size); DWORD r = 0; - if (!WriteFile(fd, data, n, &r, 0)) + OVERLAPPED o{}; + o.Offset = static_cast(pos); + o.OffsetHigh = static_cast(pos >> 32); + if (!WriteFile(fd, data, n, &r, &o)) goto error; REALM_ASSERT_RELEASE(r == n); // Partial writes are not possible. size -= size_t(r); data += size_t(r); + pos += r; } return; @@ -709,13 +702,14 @@ void File::write_static(FileDesc fd, const char* data, size_t size) while (0 < size) { // POSIX requires that 'n' is less than or equal to SSIZE_MAX size_t n = std::min(size, size_t(SSIZE_MAX)); - ssize_t r = ::write(fd, data, n); + ssize_t r = pwrite(fd, data, n, pos); if (r < 0) goto error; // LCOV_EXCL_LINE REALM_ASSERT_RELEASE(r != 0); REALM_ASSERT_RELEASE(size_t(r) <= n); size -= size_t(r); data += size_t(r); + pos += off_t(r); } return; @@ -732,44 +726,39 @@ void File::write_static(FileDesc fd, const char* data, size_t size) #endif } -void File::write(const char* data, size_t size) +void File::write(SizeType pos, const char* data, size_t size) { REALM_ASSERT_RELEASE(is_attached()); - if (m_encryption_key) { - uint64_t pos_original = get_file_pos(m_fd); - REALM_ASSERT(!int_cast_has_overflow(pos_original)); - size_t pos = size_t(pos_original); - Map write_map(*this, access_ReadWrite, static_cast(pos + size)); - realm::util::encryption_read_barrier(write_map, pos, size); - memcpy(write_map.get_addr() + pos, data, size); - realm::util::encryption_write_barrier(write_map, pos, size); - uint64_t cur = get_file_pos(m_fd); - seek(cur + size); + if (m_encryption) { + Map write_map(*this, pos, access_ReadWrite, size); + util::encryption_read_barrier(write_map, 0, size); + memcpy(write_map.get_addr(), data, size); + realm::util::encryption_write_barrier(write_map, 0, size); return; } - write_static(m_fd, data, size); + write_static(m_fd, pos, data, size); } -uint64_t File::get_file_pos(FileDesc fd) +File::SizeType File::get_file_pos() { #ifdef _WIN32 LONG high_dword = 0; LARGE_INTEGER li; LARGE_INTEGER res; li.QuadPart = 0; - bool ok = SetFilePointerEx(fd, li, &res, FILE_CURRENT); + bool ok = SetFilePointerEx(m_fd, li, &res, FILE_CURRENT); if (!ok) throw SystemError(GetLastError(), "SetFilePointer() failed"); - return uint64_t(res.QuadPart); + return SizeType(res.QuadPart); #else - auto pos = lseek(fd, 0, SEEK_CUR); + auto pos = lseek(m_fd, 0, SEEK_CUR); if (pos < 0) { throw SystemError(errno, "lseek() failed"); } - return uint64_t(pos); + return SizeType(pos); #endif } @@ -812,12 +801,10 @@ File::SizeType File::get_size() const REALM_ASSERT_RELEASE(is_attached()); File::SizeType size = get_size_static(m_fd); - if (m_encryption_key) { - File::SizeType ret_size = encrypted_size_to_data_size(size); - return ret_size; + if (m_encryption) { + return encrypted_size_to_data_size(size); } - else - return size; + return size; } @@ -825,36 +812,21 @@ void File::resize(SizeType size) { REALM_ASSERT_RELEASE(is_attached()); -#ifdef _WIN32 // Windows version - - // Save file position - SizeType p = get_file_pos(m_fd); - - if (m_encryption_key) + if (m_encryption) size = data_size_to_encrypted_size(size); - // Windows docs say "it is not an error to set the file pointer to a position beyond the end of the file." - // so seeking with SetFilePointerEx() will not error out even if there is no disk space left. - // In this scenario though, the following call to SedEndOfFile() will fail if there is no disk space left. - seek(size); - - if (!SetEndOfFile(m_fd)) { +#ifdef _WIN32 // Windows version + FILE_END_OF_FILE_INFO info; + info.EndOfFile.QuadPart = size; + if (!SetFileInformationByHandle(m_fd, FileEndOfFileInfo, &info, sizeof(info))) { DWORD err = GetLastError(); // Eliminate any risk of clobbering if (err == ERROR_HANDLE_DISK_FULL || err == ERROR_DISK_FULL) { - std::string msg = get_last_error_msg("SetEndOfFile() failed: ", err); - throw OutOfDiskSpace(msg); + throw OutOfDiskSpace(get_last_error_msg("SetFileInformationByHandle() failed: ", err)); } - throw SystemError(int(err), "SetEndOfFile() failed"); + throw SystemError(int(err), "SetFileInformationByHandle() failed"); } - - // Restore file position - seek(p); - #else // POSIX version - if (m_encryption_key) - size = data_size_to_encrypted_size(size); - off_t size2; if (int_cast_with_overflow_detect(size, size2)) throw RuntimeError(ErrorCodes::RangeError, "File size overflow"); @@ -874,62 +846,39 @@ void File::resize(SizeType size) } -void File::prealloc(size_t size) +void File::prealloc(SizeType size) { REALM_ASSERT_RELEASE(is_attached()); - - if (size <= to_size_t(get_size())) { + if (size <= get_size()) { return; } - size_t new_size = size; - if (m_encryption_key) { - new_size = static_cast(data_size_to_encrypted_size(size)); - REALM_ASSERT(size == static_cast(encrypted_size_to_data_size(new_size))); - if (new_size < size) { - throw RuntimeError(ErrorCodes::RangeError, "File size overflow: data_size_to_encrypted_size(" + - realm::util::to_string(size) + - ") == " + realm::util::to_string(new_size)); - } + SizeType new_size = size; + if (m_encryption) { + new_size = data_size_to_encrypted_size(size); + REALM_ASSERT(new_size > size); + REALM_ASSERT(size == encrypted_size_to_data_size(new_size)); } auto manually_consume_space = [&]() { - constexpr size_t chunk_size = 4096; - int64_t original_size = get_size_static(m_fd); // raw size - seek(original_size); - size_t num_bytes = size_t(new_size - original_size); + constexpr uint16_t chunk_size = 4096; + SizeType write_pos = get_size_static(m_fd); // raw size + SizeType num_bytes = new_size - write_pos; std::string zeros(chunk_size, '\0'); while (num_bytes > 0) { - size_t t = num_bytes > chunk_size ? chunk_size : num_bytes; - write_static(m_fd, zeros.c_str(), t); + uint16_t t = uint16_t(std::min(num_bytes, chunk_size)); + write_static(m_fd, write_pos, zeros.c_str(), t); num_bytes -= t; + write_pos += t; } }; - auto consume_space_interlocked = [&] { -#if REALM_ENABLE_ENCRYPTION - if (m_encryption_key) { - // We need to prevent concurrent calls to lseek from the encryption layer - // while we're writing to the file to extend it. Otherwise an intervening - // lseek may redirect the writing process, causing file corruption. - UniqueLock lck(util::mapping_mutex); - manually_consume_space(); - } - else { - manually_consume_space(); - } -#else - manually_consume_space(); -#endif - }; - #if REALM_HAVE_POSIX_FALLOCATE // Mostly Linux only if (!prealloc_if_supported(0, new_size)) { - consume_space_interlocked(); + manually_consume_space(); } -#else // Non-atomic fallback -#if REALM_PLATFORM_APPLE +#elif REALM_PLATFORM_APPLE // Non-atomic fallback // posix_fallocate() is not supported on MacOS or iOS, so use a combination of fcntl(F_PREALLOCATE) and // ftruncate(). @@ -939,22 +888,21 @@ void File::prealloc(size_t size) throw SystemError(err, "fstat() inside prealloc() failed"); } - size_t allocated_size; + SizeType allocated_size; if (int_cast_with_overflow_detect(statbuf.st_blocks, allocated_size)) { throw RuntimeError(ErrorCodes::RangeError, - "Overflow on block conversion to size_t " + realm::util::to_string(statbuf.st_blocks)); + util::format("Overflow on block conversion to SizeType %1", statbuf.st_blocks)); } if (int_multiply_with_overflow_detect(allocated_size, S_BLKSIZE)) { - throw RuntimeError(ErrorCodes::RangeError, "Overflow computing existing file space allocation blocks: " + - realm::util::to_string(allocated_size) + - " block size: " + realm::util::to_string(S_BLKSIZE)); + throw RuntimeError(ErrorCodes::RangeError, + util::format("Overflow computing existing file space allocation blocks: %1 block size %2", + allocated_size, S_BLKSIZE)); } // Only attempt to preallocate space if there's not already sufficient free space in the file. // APFS would fail with EINVAL if we attempted it, and HFS+ would preallocate extra space unnecessarily. // See for details. if (new_size > allocated_size) { - off_t to_allocate = static_cast(new_size - statbuf.st_size); fstore_t store = {F_ALLOCATEALL, F_PEOFPOSMODE, 0, to_allocate, 0}; int ret = 0; @@ -973,7 +921,7 @@ void File::prealloc(size_t size) // 2) fcntl will fail with ENOTSUP on non-supported file systems such as ExFAT. In this case // the fallback should succeed. // 3) if there is some other error such as no space left (ENOSPC) we will expect to fail again later - consume_space_interlocked(); + manually_consume_space(); } } @@ -990,18 +938,14 @@ void File::prealloc(size_t size) throw SystemError(err, "ftruncate() inside prealloc() failed"); } #elif REALM_ANDROID || defined(_WIN32) || defined(__EMSCRIPTEN__) - - consume_space_interlocked(); - + manually_consume_space(); #else #error Please check if/how your OS supports file preallocation -#endif - #endif // REALM_HAVE_POSIX_FALLOCATE } -bool File::prealloc_if_supported(SizeType offset, size_t size) +bool File::prealloc_if_supported(SizeType offset, SizeType size) { REALM_ASSERT_RELEASE(is_attached()); @@ -1356,93 +1300,6 @@ void File::rw_unlock() noexcept #endif // REALM_FILELOCK_EMULATION } -void* File::map(AccessMode a, size_t size, int /*map_flags*/, size_t offset) const -{ - return realm::util::mmap({m_fd, m_path, a, m_encryption_key.get()}, size, offset); -} - -void* File::map_fixed(AccessMode a, void* address, size_t size, int /* map_flags */, size_t offset) const -{ - if (m_encryption_key.get()) { - // encryption enabled - this is not supported - see explanation in alloc_slab.cpp - REALM_ASSERT(false); - } -#ifdef _WIN32 - // windows, no encryption - this is not supported, see explanation in alloc_slab.cpp, - // above the method 'update_reader_view()' - REALM_ASSERT(false); - return nullptr; -#else - // unencrypted - mmap part of already reserved space - return realm::util::mmap_fixed(m_fd, address, size, a, offset, m_encryption_key.get()); -#endif -} - -void* File::map_reserve(AccessMode a, size_t size, size_t offset) const -{ - static_cast(a); // FIXME: Consider removing this argument - return realm::util::mmap_reserve(m_fd, size, offset); -} - -#if REALM_ENABLE_ENCRYPTION -void* File::map(AccessMode a, size_t size, EncryptedFileMapping*& mapping, int /*map_flags*/, size_t offset) const -{ - return realm::util::mmap({m_fd, m_path, a, m_encryption_key.get()}, size, offset, mapping); -} - -void* File::map_fixed(AccessMode a, void* address, size_t size, EncryptedFileMapping* mapping, int /* map_flags */, - size_t offset) const -{ - if (m_encryption_key.get()) { - // encryption enabled - we shouldn't be here, all memory was allocated by reserve - REALM_ASSERT_RELEASE(false); - } -#ifndef _WIN32 - // no encryption. On Unixes, map relevant part of reserved virtual address range - return realm::util::mmap_fixed(m_fd, address, size, a, offset, nullptr, mapping); -#else - // no encryption - unsupported on windows - REALM_ASSERT(false); - return nullptr; -#endif -} - -void* File::map_reserve(AccessMode a, size_t size, size_t offset, EncryptedFileMapping*& mapping) const -{ - if (m_encryption_key.get()) { - // encrypted file - just mmap it, the encryption layer handles if the mapping extends beyond eof - return realm::util::mmap({m_fd, m_path, a, m_encryption_key.get()}, size, offset, mapping); - } -#ifndef _WIN32 - // not encrypted, do a proper reservation on Unixes' - return realm::util::mmap_reserve({m_fd, m_path, a, nullptr}, size, offset, mapping); -#else - // on windows, this is a no-op - return nullptr; -#endif -} - -#endif // REALM_ENABLE_ENCRYPTION - -void File::unmap(void* addr, size_t size) noexcept -{ - realm::util::munmap(addr, size); -} - - -void* File::remap(void* old_addr, size_t old_size, AccessMode a, size_t new_size, int /*map_flags*/, - size_t file_offset) const -{ - return realm::util::mremap({m_fd, m_path, a, m_encryption_key.get()}, file_offset, old_addr, old_size, new_size); -} - - -void File::sync_map(FileDesc fd, void* addr, size_t size) -{ - realm::util::msync(fd, addr, size); -} - - bool File::exists(const std::string& path) { #if REALM_HAVE_STD_FILESYSTEM @@ -1587,10 +1444,12 @@ bool File::copy(const std::string& origin_path, const std::string& target_path, } size_t buffer_size = 4096; + off_t pos = 0; auto buffer = std::make_unique(buffer_size); // Throws for (;;) { - size_t n = origin_file.read(buffer.get(), buffer_size); // Throws - target_file.write(buffer.get(), n); // Throws + size_t n = origin_file.read(pos, buffer.get(), buffer_size); // Throws + target_file.write(pos, buffer.get(), n); // Throws + pos += n; if (n < buffer_size) break; } @@ -1600,26 +1459,6 @@ bool File::copy(const std::string& origin_path, const std::string& target_path, } -bool File::compare(const std::string& path_1, const std::string& path_2) -{ - File file_1{path_1}; // Throws - File file_2{path_2}; // Throws - size_t buffer_size = 4096; - std::unique_ptr buffer_1 = std::make_unique(buffer_size); // Throws - std::unique_ptr buffer_2 = std::make_unique(buffer_size); // Throws - for (;;) { - size_t n_1 = file_1.read(buffer_1.get(), buffer_size); // Throws - size_t n_2 = file_2.read(buffer_2.get(), buffer_size); // Throws - if (n_1 != n_2) - return false; - if (!std::equal(buffer_1.get(), buffer_1.get() + n_1, buffer_2.get())) - return false; - if (n_1 < buffer_size) - break; - } - return true; -} - bool File::is_same_file_static(FileDesc f1, FileDesc f2, const std::string& path1, const std::string& path2) { return get_unique_id(f1, path1) == get_unique_id(f2, path2); @@ -1649,23 +1488,6 @@ FileDesc File::dup_file_desc(FileDesc fd) return fd_duped; } -File::UniqueID File::get_unique_id() -{ - REALM_ASSERT_RELEASE(is_attached()); - File::UniqueID uid = File::get_unique_id(m_fd, m_path); - if (!m_cached_unique_id) { - m_cached_unique_id = std::make_optional(uid); - } - if (m_cached_unique_id != uid) { - throw FileAccessError(ErrorCodes::FileOperationFailed, - util::format("The unique id of this Realm file has changed unexpectedly, this could be " - "due to modifications by an external process '%1'", - m_path), - m_path); - } - return uid; -} - FileDesc File::get_descriptor() const { return m_fd; @@ -1815,12 +1637,10 @@ void File::set_encryption_key(const char* key) { #if REALM_ENABLE_ENCRYPTION if (key) { - auto buffer = std::make_unique(64); - memcpy(buffer.get(), key, 64); - m_encryption_key = std::move(buffer); + m_encryption = std::make_unique(key, m_fd); } else { - m_encryption_key.reset(); + m_encryption.reset(); } #else if (key) { @@ -1829,22 +1649,54 @@ void File::set_encryption_key(const char* key) #endif } -const char* File::get_encryption_key() const +EncryptedFile* File::get_encryption() const noexcept { - return m_encryption_key.get(); +#if REALM_ENABLE_ENCRYPTION + return m_encryption.get(); +#else + return nullptr; +#endif } -void File::MapBase::map(const File& f, AccessMode a, size_t size, int map_flags, size_t offset, - util::WriteObserver* observer) +File::MapBase::MapBase() noexcept = default; +File::MapBase::~MapBase() noexcept +{ + unmap(); +} + +File::MapBase::MapBase(MapBase&& other) noexcept +{ + *this = std::move(other); +} + +File::MapBase& File::MapBase::operator=(MapBase&& other) noexcept +{ + REALM_ASSERT(this != &other); + if (m_addr) + unmap(); + m_addr = std::exchange(other.m_addr, nullptr); + m_size = std::exchange(other.m_size, 0); + m_access_mode = other.m_access_mode; + m_reservation_size = std::exchange(other.m_reservation_size, 0); + m_offset = std::exchange(other.m_offset, 0); + m_fd = std::exchange(other.m_fd, invalid_fd); +#if REALM_ENABLE_ENCRYPTION + m_encrypted_mapping = std::move(other.m_encrypted_mapping); +#endif + return *this; +} + +void File::MapBase::map(const File& f, AccessMode a, size_t size, SizeType offset, util::WriteObserver* observer) { REALM_ASSERT(!m_addr); #if REALM_ENABLE_ENCRYPTION - m_addr = f.map(a, size, m_encrypted_mapping, map_flags, offset); + m_addr = mmap({f.m_fd, a, f.m_encryption.get()}, size, offset, m_encrypted_mapping); if (observer && m_encrypted_mapping) { m_encrypted_mapping->set_observer(observer); } #else - m_addr = f.map(a, size, map_flags, offset); + std::unique_ptr dummy_encrypted_mapping; + m_addr = mmap({f.m_fd, a, nullptr}, size, offset, dummy_encrypted_mapping); static_cast(observer); #endif m_size = m_reservation_size = size; @@ -1860,25 +1712,15 @@ void File::MapBase::unmap() noexcept return; REALM_ASSERT(m_reservation_size); #if REALM_ENABLE_ENCRYPTION - if (m_encrypted_mapping) { - m_encrypted_mapping = nullptr; - util::remove_encrypted_mapping(m_addr, m_size); - } + m_encrypted_mapping = nullptr; #endif - ::munmap(m_addr, m_reservation_size); + munmap(m_addr, m_reservation_size); m_addr = nullptr; m_size = 0; m_reservation_size = 0; } -void File::MapBase::remap(const File& f, AccessMode a, size_t size, int map_flags) -{ - REALM_ASSERT(m_addr); - m_addr = f.remap(m_addr, m_size, a, size, map_flags); - m_size = m_reservation_size = size; -} - -bool File::MapBase::try_reserve(const File& file, AccessMode a, size_t size, size_t offset, +bool File::MapBase::try_reserve(const File& file, AccessMode a, size_t size, SizeType offset, util::WriteObserver* observer) { #ifdef _WIN32 @@ -1896,9 +1738,8 @@ bool File::MapBase::try_reserve(const File& file, AccessMode a, size_t size, siz m_fd = file.get_descriptor(); m_offset = offset; #if REALM_ENABLE_ENCRYPTION - if (file.m_encryption_key) { - m_encrypted_mapping = - util::reserve_mapping(addr, {m_fd, file.get_path(), a, file.m_encryption_key.get()}, offset); + if (file.m_encryption) { + m_encrypted_mapping = util::reserve_mapping(addr, {m_fd, a, file.m_encryption.get()}, offset); if (observer) { m_encrypted_mapping->set_observer(observer); } @@ -1915,7 +1756,6 @@ bool File::MapBase::try_extend_to(size_t size) noexcept if (size > m_reservation_size) { return false; } - // return false; #ifndef _WIN32 char* extension_start_addr = (char*)m_addr + m_size; size_t extension_size = size - m_size; @@ -1927,14 +1767,14 @@ bool File::MapBase::try_extend_to(size_t size) noexcept if (got_addr == MAP_FAILED) return false; REALM_ASSERT(got_addr == extension_start_addr); - util::extend_encrypted_mapping(m_encrypted_mapping, m_addr, m_offset, m_size, size); m_size = size; + m_encrypted_mapping->extend_to(m_offset, size); return true; } #endif try { - void* got_addr = util::mmap_fixed(m_fd, extension_start_addr, extension_size, m_access_mode, - extension_start_offset, nullptr); + void* got_addr = + util::mmap_fixed(m_fd, extension_start_addr, extension_size, m_access_mode, extension_start_offset); if (got_addr == extension_start_addr) { m_size = size; return true; @@ -1950,17 +1790,25 @@ bool File::MapBase::try_extend_to(size_t size) noexcept void File::MapBase::sync() { REALM_ASSERT(m_addr); +#if REALM_ENABLE_ENCRYPTION + if (m_encrypted_mapping) { + m_encrypted_mapping->sync(); + return; + } +#endif - File::sync_map(m_fd, m_addr, m_size); + realm::util::msync(m_fd, m_addr, m_size); } -void File::MapBase::flush() +void File::MapBase::flush(bool skip_validate) { REALM_ASSERT(m_addr); #if REALM_ENABLE_ENCRYPTION if (m_encrypted_mapping) { - realm::util::encryption_flush(m_encrypted_mapping); + m_encrypted_mapping->flush(skip_validate); } +#else + static_cast(skip_validate); #endif } @@ -2003,13 +1851,10 @@ File::SizeType File::get_free_space(const std::string& path) DirScanner::DirScanner(const std::string& path, bool allow_missing) { - try { - m_iterator = std::filesystem::directory_iterator(u8path(path)); - } - catch (const std::filesystem::filesystem_error& e) { - if (e.code() != std::errc::no_such_file_or_directory || !allow_missing) - throw; - } + std::error_code ec; + m_iterator = std::filesystem::directory_iterator(u8path(path), ec); + if (ec && (ec != std::errc::no_such_file_or_directory || !allow_missing)) + throw std::filesystem::filesystem_error("directory_iterator::directory_iterator", u8path(path), ec); } DirScanner::~DirScanner() = default; diff --git a/src/realm/util/file.hpp b/src/realm/util/file.hpp index 6fc910280ca..450a91b908a 100644 --- a/src/realm/util/file.hpp +++ b/src/realm/util/file.hpp @@ -54,6 +54,7 @@ namespace realm::util { +class EncryptedFile; class EncryptedFileMapping; class WriteObserver; @@ -114,11 +115,6 @@ std::string make_temp_file(const char* prefix); size_t page_size(); -struct OnlyForTestingPageSizeChange { - OnlyForTestingPageSizeChange(size_t new_page_size); - ~OnlyForTestingPageSizeChange(); -}; - /// This class provides a RAII abstraction over the concept of a file /// descriptor (or file handle). /// @@ -149,13 +145,13 @@ class File { mode_Append ///< access_ReadWrite, create_Auto, flag_Append (fopen: ab+) }; - /// Equivalent to calling open(const std::string&, Mode) on a + /// Equivalent to calling open(std::string_view, Mode) on a /// default constructed instance. - explicit File(const std::string& path, Mode = mode_Read); + explicit File(std::string_view path, Mode = mode_Read); /// Create an instance that is not initially attached to an open /// file. - File() = default; + File(); ~File() noexcept; File(File&&) noexcept; @@ -174,13 +170,12 @@ class File { /// derived from AccessError, the derived exception type is thrown /// (as long as the underlying system provides the information to /// unambiguously distinguish that particular reason). - void open(const std::string& path, Mode = mode_Read); + void open(std::string_view path, Mode = mode_Read); /// This function is idempotent, that is, it is valid to call it /// regardless of whether this instance currently is attached to /// an open file. void close() noexcept; - static void close_static(FileDesc fd); // throws /// Check whether this File instance is currently attached to an /// open file. @@ -202,18 +197,21 @@ class File { flag_Append = 2 ///< Move to end of file before each write. }; - /// See open(const std::string&, Mode). + /// See open(std::string_view, Mode). /// /// Specifying access_ReadOnly together with a create mode that is /// not create_Never, or together with a non-zero \a flags /// argument, results in undefined behavior. Specifying flag_Trunc /// together with create_Must results in undefined behavior. - void open(const std::string& path, AccessMode, CreateMode, int flags); + void open(std::string_view path, AccessMode, CreateMode, int flags); /// Same as open(path, access_ReadWrite, create_Auto, 0), except /// that this one returns an indication of whether a new file was /// created, or an existing file was opened. - void open(const std::string& path, bool& was_created); + void open(std::string_view path, bool& was_created); + + /// Plays the same role as off_t in POSIX + typedef int_fast64_t SizeType; /// Read data into the specified buffer and return the number of /// bytes read. If the returned number of bytes is less than \a @@ -221,8 +219,8 @@ class File { /// /// Calling this function on an instance, that is not currently /// attached to an open file, has undefined behavior. - size_t read(char* data, size_t size); - static size_t read_static(FileDesc fd, char* data, size_t size); + size_t read(SizeType pos, char* data, size_t size); + static size_t read_static(FileDesc fd, SizeType pos, char* data, size_t size); /// Write the specified data to this file. /// @@ -231,35 +229,32 @@ class File { /// /// Calling this function on an instance, that was opened in /// read-only mode, has undefined behavior. - void write(const char* data, size_t size); - static void write_static(FileDesc fd, const char* data, size_t size); + void write(SizeType pos, const char* data, size_t size); + static void write_static(FileDesc fd, SizeType pos, const char* data, size_t size); // Tells current file pointer of fd - static uint64_t get_file_pos(FileDesc fd); + SizeType get_file_pos(); /// Calls write(s.data(), s.size()). - void write(const std::string& s) + void write(SizeType pos, std::string_view s) { - write(s.data(), s.size()); + write(pos, s.data(), s.size()); } /// Calls read(data, N). template - size_t read(char (&data)[N]) + size_t read(SizeType pos, char (&data)[N]) { - return read(data, N); + return read(pos, data, N); } /// Calls write(data(), N). template - void write(const char (&data)[N]) + void write(SizeType pos, const char (&data)[N]) { - write(data, N); + write(pos, data, N); } - /// Plays the same role as off_t in POSIX - typedef int_fast64_t SizeType; - /// Calling this function on an instance that is not attached to /// an open file has undefined behavior. SizeType get_size() const; @@ -287,7 +282,7 @@ class File { /// through distinct File instances. /// /// \sa prealloc_if_supported() - void prealloc(size_t new_size); + void prealloc(SizeType new_size); /// When supported by the system, allocate space on the target /// device for the specified region of the file. If the region @@ -308,7 +303,7 @@ class File { /// /// \sa prealloc() /// \sa is_prealloc_supported() - bool prealloc_if_supported(SizeType offset, size_t size); + bool prealloc_if_supported(SizeType offset, SizeType size); /// See prealloc_if_supported(). static bool is_prealloc_supported(); @@ -394,22 +389,11 @@ class File { /// \param key A 64-byte encryption key, or null to disable encryption. void set_encryption_key(const char* key); - /// Get the encryption key set by set_encryption_key(), - /// null_ptr if no key set. - const char* get_encryption_key() const; + EncryptedFile* get_encryption() const noexcept; /// Set the path used for emulating file locks. If not set explicitly, /// the emulation will use the path of the file itself suffixed by ".fifo" void set_fifo_path(const std::string& fifo_dir_path, const std::string& fifo_file_name); - enum { - /// If possible, disable opportunistic flushing of dirted - /// pages of a memory mapped file to physical medium. On some - /// systems this cannot be disabled. On other systems it is - /// the default behavior. An explicit call to sync_map() will - /// flush the buffers regardless of whether this flag is - /// specified or not. - map_NoSync = 1 - }; /// Map this file into memory. The file is mapped as shared /// memory. This allows two processes to interact under exatly the @@ -431,39 +415,6 @@ class File { /// /// Calling this function with a size that is greater than the /// size of the file has undefined behavior. - void* map(AccessMode, size_t size, int map_flags = 0, size_t offset = 0) const; - void* map_fixed(AccessMode, void* address, size_t size, int map_flags = 0, size_t offset = 0) const; - void* map_reserve(AccessMode, size_t size, size_t offset) const; - /// The same as unmap(old_addr, old_size) followed by map(a, - /// new_size, map_flags), but more efficient on some systems. - /// - /// The old address range must have been acquired by a call to - /// map() or remap() on this File instance, the specified access - /// mode and flags must be the same as the ones specified - /// previously, and this File instance must not have been reopend - /// in the meantime. Failing to adhere to these rules will result - /// in undefined behavior. - /// - /// If this function throws, the old address range will remain - /// mapped. - void* remap(void* old_addr, size_t old_size, AccessMode a, size_t new_size, int map_flags = 0, - size_t file_offset = 0) const; - -#if REALM_ENABLE_ENCRYPTION - void* map(AccessMode, size_t size, EncryptedFileMapping*& mapping, int map_flags = 0, size_t offset = 0) const; - void* map_fixed(AccessMode, void* address, size_t size, EncryptedFileMapping* mapping, int map_flags = 0, - size_t offset = 0) const; - void* map_reserve(AccessMode, size_t size, size_t offset, EncryptedFileMapping*& mapping) const; -#endif - /// Unmap the specified address range which must have been - /// previously returned by map(). - static void unmap(void* addr, size_t size) noexcept; - - /// Flush in-kernel buffers to disk. This blocks the caller until - /// the synchronization operation is complete. The specified - /// address range must be (a subset of) one that was previously returned by - /// map(). - static void sync_map(FileDesc fd, void* addr, size_t size); /// Check whether the specified file or directory exists. Note /// that a file or directory that resides in a directory that the @@ -522,10 +473,6 @@ class File { /// Copy the file at the specified origin path to the specified target path. static bool copy(const std::string& origin_path, const std::string& target_path, bool overwrite_existing = true); - /// Compare the two files at the specified paths for equality. Returns true - /// if, and only if they are equal. - static bool compare(const std::string& path_1, const std::string& path_2); - /// Check whether two open file descriptors refer to the same /// underlying file, that is, if writing via one of them, will /// affect what is read from the other. In UNIX this boils down to @@ -609,9 +556,6 @@ class File { uint_fast64_t inode; #endif }; - // Return the unique id for the current opened file descriptor. - // Same UniqueID means they are the same file. - UniqueID get_unique_id(); // Throws // Return the file descriptor for the file FileDesc get_descriptor() const; // Return the path of the open file, or an empty string if @@ -636,25 +580,26 @@ class File { class Streambuf; private: - bool m_have_lock = false; // Only valid when m_fd is not null #ifdef _WIN32 - HANDLE m_fd = nullptr; + static inline const HANDLE invalid_fd = INVALID_HANDLE_VALUE; #else - int m_fd = -1; + static inline const int invalid_fd = -1; +#endif + + FileDesc m_fd = invalid_fd; + bool m_have_lock = false; // Only valid when m_fd is not null #ifdef REALM_FILELOCK_EMULATION - int m_pipe_fd = -1; // -1 if no pipe has been allocated for emulation bool m_has_exclusive_lock = false; + int m_pipe_fd = -1; // -1 if no pipe has been allocated for emulation std::string m_fifo_dir_path; std::string m_fifo_path; #endif -#endif - std::unique_ptr m_encryption_key = nullptr; + std::unique_ptr m_encryption; std::string m_path; - std::optional m_cached_unique_id; bool lock(bool exclusive, bool non_blocking); bool rw_lock(bool exclusive, bool non_blocking); - void open_internal(const std::string& path, AccessMode, CreateMode, int flags, bool* success); + void open_internal(std::string_view path, AccessMode, CreateMode, int flags, bool* success); #ifdef REALM_FILELOCK_EMULATION bool has_shared_lock() const noexcept @@ -667,11 +612,11 @@ class File { void* m_addr = nullptr; mutable size_t m_size = 0; size_t m_reservation_size = 0; - size_t m_offset = 0; - FileDesc m_fd; + uint64_t m_offset = 0; + FileDesc m_fd = invalid_fd; AccessMode m_access_mode = access_ReadOnly; - MapBase() noexcept = default; + MapBase() noexcept; ~MapBase() noexcept; // Disable copying. Copying an opened MapBase will create a scenario @@ -679,19 +624,19 @@ class File { MapBase(const MapBase&) = delete; MapBase& operator=(const MapBase&) = delete; - // Use - void map(const File&, AccessMode, size_t size, int map_flags, size_t offset = 0, - util::WriteObserver* observer = nullptr); + MapBase(MapBase&& other) noexcept; + MapBase& operator=(MapBase&& other) noexcept; + + void map(const File&, AccessMode, size_t size, SizeType offset = 0, util::WriteObserver* observer = nullptr); // reserve address space for later mapping operations. // returns false if reservation can't be done. - bool try_reserve(const File&, AccessMode, size_t size, size_t offset = 0, + bool try_reserve(const File&, AccessMode, size_t size, SizeType offset = 0, util::WriteObserver* observer = nullptr); - void remap(const File&, AccessMode, size_t size, int map_flags); void unmap() noexcept; // fully update any process shared representation (e.g. buffer cache). // other processes will be able to see changes, but a full platform crash // may loose data - void flush(); + void flush(bool skip_validate = false); // try to extend the mapping in-place. Virtual address space must have // been set aside earlier by a call to reserve() bool try_extend_to(size_t size) noexcept; @@ -699,13 +644,13 @@ class File { // crash will *not* have lost data. void sync(); #if REALM_ENABLE_ENCRYPTION - mutable util::EncryptedFileMapping* m_encrypted_mapping = nullptr; - inline util::EncryptedFileMapping* get_encrypted_mapping() const + mutable std::unique_ptr m_encrypted_mapping; + util::EncryptedFileMapping* get_encrypted_mapping() const { - return m_encrypted_mapping; + return m_encrypted_mapping.get(); } #else - inline util::EncryptedFileMapping* get_encrypted_mapping() const + util::EncryptedFileMapping* get_encrypted_mapping() const { return nullptr; } @@ -713,7 +658,6 @@ class File { }; }; - /// This class provides a RAII abstraction over the concept of a /// memory mapped file. /// @@ -733,15 +677,15 @@ template class File::Map : private MapBase { public: /// Equivalent to calling map() on a default constructed instance. - explicit Map(const File&, AccessMode = access_ReadOnly, size_t size = sizeof(T), int map_flags = 0, + explicit Map(const File&, AccessMode = access_ReadOnly, size_t size = sizeof(T), util::WriteObserver* observer = nullptr); - explicit Map(const File&, size_t offset, AccessMode = access_ReadOnly, size_t size = sizeof(T), int map_flags = 0, + explicit Map(const File&, SizeType offset, AccessMode = access_ReadOnly, size_t size = sizeof(T), util::WriteObserver* observer = nullptr); /// Create an instance that is not initially attached to a memory /// mapped file. - Map() noexcept; + Map() noexcept = default; // Disable copying. Copying an opened Map will create a scenario // where the same memory will be mapped once but unmapped twice. @@ -749,30 +693,8 @@ class File::Map : private MapBase { Map& operator=(const Map&) = delete; /// Move the mapping from another Map object to this Map object - File::Map& operator=(File::Map&& other) noexcept - { - REALM_ASSERT(this != &other); - if (m_addr) - unmap(); - m_addr = other.get_addr(); - m_size = other.m_size; - m_access_mode = other.m_access_mode; - m_reservation_size = other.m_reservation_size; - m_offset = other.m_offset; - m_fd = other.m_fd; - other.m_offset = 0; - other.m_addr = nullptr; - other.m_size = other.m_reservation_size = 0; -#if REALM_ENABLE_ENCRYPTION - m_encrypted_mapping = other.m_encrypted_mapping; - other.m_encrypted_mapping = nullptr; -#endif - return *this; - } - Map(Map&& other) noexcept - { - *this = std::move(other); - } + File::Map& operator=(File::Map&& other) noexcept = default; + Map(Map&& other) noexcept = default; /// See File::map(). /// @@ -780,7 +702,7 @@ class File::Map : private MapBase { /// attached to a memory mapped file has undefined behavior. The /// returned pointer is the same as what will subsequently be /// returned by get_addr(). - T* map(const File&, AccessMode = access_ReadOnly, size_t size = sizeof(T), int map_flags = 0, size_t offset = 0, + T* map(const File&, AccessMode = access_ReadOnly, size_t size = sizeof(T), SizeType offset = 0, util::WriteObserver* observer = nullptr); /// See File::unmap(). This function is idempotent, that is, it is @@ -788,16 +710,21 @@ class File::Map : private MapBase { /// currently attached to a memory mapped file. void unmap() noexcept; - bool try_reserve(const File&, AccessMode a = access_ReadOnly, size_t size = sizeof(T), size_t offset = 0, + bool try_reserve(const File&, AccessMode a = access_ReadOnly, size_t size = sizeof(T), SizeType offset = 0, util::WriteObserver* observer = nullptr); - /// See File::remap(). + /// The same as unmap(old_addr, old_size) followed by map(a, + /// new_size, map_flags), but more efficient on some systems. + /// /// /// Calling this function on a Map instance that is not currently attached /// to a memory mapped file is equivalent to calling map(). The returned /// pointer is the same as what will subsequently be returned by /// get_addr(). - T* remap(const File&, AccessMode = access_ReadOnly, size_t size = sizeof(T), int map_flags = 0); + /// + /// If this function throws, the old address range will remain + /// mapped. + T* remap(const File&, AccessMode = access_ReadOnly, size_t size = sizeof(T)); /// Try to extend the existing mapping to a given size bool try_extend_to(size_t size) noexcept; @@ -806,9 +733,9 @@ class File::Map : private MapBase { /// /// Calling this function on an instance that is not currently /// attached to a memory mapped file, has undefined behavior. - void sync(); + using MapBase::flush; + using MapBase::sync; - void flush(); /// Check whether this Map instance is currently attached to a /// memory mapped file. bool is_attached() const noexcept; @@ -834,18 +761,8 @@ class File::Map : private MapBase { return m_access_mode == access_ReadWrite; } -#if REALM_ENABLE_ENCRYPTION /// Get the encrypted file mapping corresponding to this mapping - inline EncryptedFileMapping* get_encrypted_mapping() const - { - return m_encrypted_mapping; - } -#else - inline EncryptedFileMapping* get_encrypted_mapping() const - { - return nullptr; - } -#endif + using MapBase::get_encrypted_mapping; friend class UnmapGuard; }; @@ -967,16 +884,6 @@ class DirScanner { // Implementation: -inline File::File(const std::string& path, Mode m) -{ - open(path, m); -} - -inline File::~File() noexcept -{ - close(); -} - inline void File::set_fifo_path(const std::string& fifo_dir_path, const std::string& fifo_file_name) { #ifdef REALM_FILELOCK_EMULATION @@ -988,49 +895,7 @@ inline void File::set_fifo_path(const std::string& fifo_dir_path, const std::str #endif } -inline File::File(File&& f) noexcept -{ -#ifdef _WIN32 - m_fd = f.m_fd; - f.m_fd = nullptr; -#else - m_fd = f.m_fd; -#ifdef REALM_FILELOCK_EMULATION - m_pipe_fd = f.m_pipe_fd; - m_has_exclusive_lock = f.m_has_exclusive_lock; - f.m_has_exclusive_lock = false; - f.m_pipe_fd = -1; -#endif - f.m_fd = -1; -#endif - m_have_lock = f.m_have_lock; - f.m_have_lock = false; - m_encryption_key = std::move(f.m_encryption_key); -} - -inline File& File::operator=(File&& f) noexcept -{ - close(); -#ifdef _WIN32 - m_fd = f.m_fd; - f.m_fd = nullptr; -#else - m_fd = f.m_fd; - f.m_fd = -1; -#ifdef REALM_FILELOCK_EMULATION - m_pipe_fd = f.m_pipe_fd; - f.m_pipe_fd = -1; - m_has_exclusive_lock = f.m_has_exclusive_lock; - f.m_has_exclusive_lock = false; -#endif -#endif - m_have_lock = f.m_have_lock; - f.m_have_lock = false; - m_encryption_key = std::move(f.m_encryption_key); - return *this; -} - -inline void File::open(const std::string& path, Mode m) +inline void File::open(std::string_view path, Mode m) { AccessMode a = access_ReadWrite; CreateMode c = create_Auto; @@ -1053,13 +918,13 @@ inline void File::open(const std::string& path, Mode m) open(path, a, c, flags); } -inline void File::open(const std::string& path, AccessMode am, CreateMode cm, int flags) +inline void File::open(std::string_view path, AccessMode am, CreateMode cm, int flags) { open_internal(path, am, cm, flags, nullptr); } -inline void File::open(const std::string& path, bool& was_created) +inline void File::open(std::string_view path, bool& was_created) { while (1) { bool success; @@ -1078,11 +943,7 @@ inline void File::open(const std::string& path, bool& was_created) inline bool File::is_attached() const noexcept { -#ifdef _WIN32 - return (m_fd != nullptr); -#else - return 0 <= m_fd; -#endif + return m_fd != invalid_fd; } inline void File::rw_lock_shared() @@ -1110,40 +971,28 @@ inline bool File::try_lock() return lock(true, true); } -inline File::MapBase::~MapBase() noexcept -{ - unmap(); -} - template -inline File::Map::Map(const File& f, AccessMode a, size_t size, int map_flags, util::WriteObserver* observer) +inline File::Map::Map(const File& f, AccessMode a, size_t size, util::WriteObserver* observer) { - map(f, a, size, map_flags, 0, observer); + map(f, a, size, 0, observer); } template -inline File::Map::Map(const File& f, size_t offset, AccessMode a, size_t size, int map_flags, - util::WriteObserver* observer) +inline File::Map::Map(const File& f, SizeType offset, AccessMode a, size_t size, util::WriteObserver* observer) { - map(f, a, size, map_flags, offset, observer); + map(f, a, size, offset, observer); } template -inline File::Map::Map() noexcept +inline T* File::Map::map(const File& f, AccessMode a, size_t size, SizeType offset, util::WriteObserver* observer) { -} - -template -inline T* File::Map::map(const File& f, AccessMode a, size_t size, int map_flags, size_t offset, - util::WriteObserver* observer) -{ - MapBase::map(f, a, size, map_flags, offset, observer); + MapBase::map(f, a, size, offset, observer); return static_cast(m_addr); } template -inline bool File::Map::try_reserve(const File& f, AccessMode a, size_t size, size_t offset, +inline bool File::Map::try_reserve(const File& f, AccessMode a, size_t size, SizeType offset, util::WriteObserver* observer) { return MapBase::try_reserve(f, a, size, offset, observer); @@ -1156,13 +1005,11 @@ inline void File::Map::unmap() noexcept } template -inline T* File::Map::remap(const File& f, AccessMode a, size_t size, int map_flags) +inline T* File::Map::remap(const File& f, AccessMode a, size_t size) { - // MapBase::remap(f, a, size, map_flags); // missing sync() here? unmap(); - map(f, a, size, map_flags); - + map(f, a, size); return static_cast(m_addr); } @@ -1172,18 +1019,6 @@ inline bool File::Map::try_extend_to(size_t size) noexcept return MapBase::try_extend_to(sizeof(T) * size); } -template -inline void File::Map::sync() -{ - MapBase::sync(); -} - -template -inline void File::Map::flush() -{ - MapBase::flush(); -} - template inline bool File::Map::is_attached() const noexcept { @@ -1207,7 +1042,7 @@ inline T* File::Map::release() noexcept { T* addr = static_cast(m_addr); m_addr = nullptr; - m_fd = 0; + m_fd = invalid_fd; return addr; } @@ -1261,8 +1096,10 @@ inline void File::Streambuf::flush() { size_t n = pptr() - pbase(); if (n > 0) { - m_file.write(pbase(), n); + SizeType pos = m_file.get_file_pos(); + m_file.write(pos, pbase(), n); setp(m_buffer.get(), epptr()); + m_file.seek(pos + n); } } @@ -1312,7 +1149,6 @@ inline bool operator>=(const File::UniqueID& lhs, const File::UniqueID& rhs) { return !(lhs < rhs); } - } // namespace realm::util #endif // REALM_UTIL_FILE_HPP diff --git a/src/realm/util/file_mapper.cpp b/src/realm/util/file_mapper.cpp index 38cbed71bcd..3b35ffa91c0 100644 --- a/src/realm/util/file_mapper.cpp +++ b/src/realm/util/file_mapper.cpp @@ -16,9 +16,15 @@ * **************************************************************************/ +#include + +#include +#include +#include #include +#include -#include +#include #ifdef _WIN32 #include @@ -27,16 +33,12 @@ #include #endif -#include -#include -#include -#include -#include - #if REALM_ENABLE_ENCRYPTION #include #include +#include +#include #include #include @@ -44,15 +46,7 @@ #include #include #include -#include -#include -#include #include - -#include -#include -#include -#include #include // for memset #if REALM_PLATFORM_APPLE @@ -62,572 +56,102 @@ #endif // enable encryption namespace { - inline bool is_mmap_memory_error(int err) { return (err == EAGAIN || err == EMFILE || err == ENOMEM); } - } // Unnamed namespace -using namespace realm; -using namespace realm::util; - -namespace realm { -namespace util { - +namespace realm::util { size_t round_up_to_page_size(size_t size) noexcept { - return (size + page_size() - 1) & ~(page_size() - 1); -} - - -#if REALM_ENABLE_ENCRYPTION - -// A list of all of the active encrypted mappings for a single file -struct mappings_for_file { - File::UniqueID file_unique_id; - std::shared_ptr info; -}; - -// Group the information we need to map a SIGSEGV address to an -// EncryptedFileMapping for the sake of cache-friendliness with 3+ active -// mappings (and no worse with only two) -struct mapping_and_addr { - std::shared_ptr mapping; - void* addr; - size_t size; -}; - -util::Mutex& mapping_mutex = *(new util::Mutex); -namespace { -std::vector& mappings_by_addr = *new std::vector; -std::vector& mappings_by_file = *new std::vector; -static unsigned int file_reclaim_index = 0; -static std::atomic num_decrypted_pages(0); // this is for statistical purposes -static std::atomic reclaimer_target(0); // do. -static std::atomic reclaimer_workload(0); // do. -// helpers - -int64_t fetch_value_in_file(const std::string& fname, const char* scan_pattern) -{ - std::ifstream file(fname); - if (file) { - std::stringstream buffer; - buffer << file.rdbuf(); - - std::string s = buffer.str(); - std::smatch m; - std::regex e(scan_pattern); - - if (std::regex_search(s, m, e)) { - std::string ibuf = m[1]; - return strtol(ibuf.c_str(), nullptr, 10); - } - } - return PageReclaimGovernor::no_match; + auto ps = page_size(); + return (size + ps - 1) & ~(ps - 1); } -/* Default reclaim governor - * - */ - -class DefaultGovernor : public PageReclaimGovernor { -public: - static int64_t pick_lowest_valid(int64_t a, int64_t b) - { - if (a == PageReclaimGovernor::no_match) - return b; - if (b == PageReclaimGovernor::no_match) - return a; - return std::min(a, b); - } - - static int64_t pick_if_valid(int64_t source, int64_t target) - { - if (source == PageReclaimGovernor::no_match) - return PageReclaimGovernor::no_match; - return target; - } - - static int64_t get_target_from_system(const std::string& cfg_file_name) - { - int64_t target; - auto local_spec = fetch_value_in_file(cfg_file_name, "target ([[:digit:]]+)"); - if (local_spec != no_match) { // overrides everything! - target = local_spec; - } - else { - // no local spec, try to deduce something reasonable from platform info - auto from_proc = fetch_value_in_file("/proc/meminfo", "MemTotal:[[:space:]]+([[:digit:]]+) kB") * 1024; - auto from_cgroup = fetch_value_in_file("/sys/fs/cgroup/memory/memory.limit_in_bytes", "^([[:digit:]]+)"); - auto cache_use = fetch_value_in_file("/sys/fs/cgroup/memory/memory.stat", "cache ([[:digit:]]+)"); - target = pick_if_valid(from_proc, from_proc / 4); - target = pick_lowest_valid(target, pick_if_valid(from_cgroup, from_cgroup / 4)); - target = pick_lowest_valid(target, pick_if_valid(cache_use, cache_use)); - } - return target; - } - - util::UniqueFunction current_target_getter(size_t load) override - { - static_cast(load); - if (m_refresh_count > 0) { - --m_refresh_count; - return [target = m_target] { - return target; - }; - } - m_refresh_count = 10; - - return [file_name = m_cfg_file_name] { - return get_target_from_system(file_name); - }; - } - - void report_target_result(int64_t target) override - { - m_target = target; - } - - DefaultGovernor() - { - auto cfg_name = getenv("REALM_PAGE_GOVERNOR_CFG"); - if (cfg_name) { - m_cfg_file_name = cfg_name; - } - } - -private: - std::string m_cfg_file_name; - int64_t m_target = 0; - int m_refresh_count = 0; -}; - -static DefaultGovernor default_governor; -static PageReclaimGovernor* governor = &default_governor; - -void reclaim_pages(); - -#if !REALM_PLATFORM_APPLE -static std::atomic reclaimer_shutdown(false); -static std::unique_ptr reclaimer_thread; - -static void ensure_reclaimer_thread_runs() +void* mmap(const FileAttributes& file, size_t size, uint64_t offset, std::unique_ptr& mapping) { - if (reclaimer_thread == nullptr) { - reclaimer_thread = std::make_unique([] { - while (!reclaimer_shutdown) { - reclaim_pages(); - millisleep(1000); - } - }); - } -} - -struct ReclaimerThreadStopper { - ~ReclaimerThreadStopper() - { - if (reclaimer_thread) { - reclaimer_shutdown = true; - reclaimer_thread->join(); - } - } -} reclaimer_thread_stopper; -#else // REALM_PLATFORM_APPLE -static dispatch_source_t reclaimer_timer; -static dispatch_queue_t reclaimer_queue; + _impl::SimulatedFailure::trigger_mmap(size); -static void ensure_reclaimer_thread_runs() -{ - if (!reclaimer_timer) { - reclaimer_queue = dispatch_queue_create_with_target("io.realm.page-reclaimer", DISPATCH_QUEUE_SERIAL, - dispatch_get_global_queue(QOS_CLASS_BACKGROUND, 0)); - reclaimer_timer = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, reclaimer_queue); - dispatch_source_set_timer(reclaimer_timer, DISPATCH_TIME_NOW, NSEC_PER_SEC, NSEC_PER_SEC); - dispatch_source_set_event_handler(reclaimer_timer, ^{ - reclaim_pages(); +#if REALM_ENABLE_ENCRYPTION + if (file.encryption) { + auto page_start = offset & ~(page_size() - 1); + size += size_t(offset - page_start); + size = round_up_to_page_size(size); + void* addr = mmap_anon(size); + ScopeExitFail cleanup([&]() noexcept { + munmap(addr, size); }); - dispatch_resume(reclaimer_timer); + mapping = file.encryption->add_mapping(page_start, addr, size, file.access); + return static_cast(addr) - page_start + offset; } -} - -struct ReclaimerThreadStopper { - ~ReclaimerThreadStopper() - { - if (reclaimer_timer) { - dispatch_source_cancel(reclaimer_timer); - // Block until any currently-running timer tasks are done - dispatch_sync(reclaimer_queue, ^{ - }); - dispatch_release(reclaimer_timer); - dispatch_release(reclaimer_queue); - } - } -} reclaimer_thread_stopper; + mapping = nullptr; +#else + static_cast(mapping); #endif -} // anonymous namespace - -void set_page_reclaim_governor(PageReclaimGovernor* new_governor) -{ - UniqueLock lock(mapping_mutex); - governor = new_governor ? new_governor : &default_governor; - ensure_reclaimer_thread_runs(); -} - -size_t get_num_decrypted_pages() -{ - return num_decrypted_pages.load(); -} -void encryption_note_reader_start(SharedFileInfo& info, const void* reader_id) -{ - UniqueLock lock(mapping_mutex); - ensure_reclaimer_thread_runs(); - auto j = std::find_if(info.readers.begin(), info.readers.end(), [=](auto& reader) { - return reader.reader_ID == reader_id; - }); - if (j == info.readers.end()) { - ReaderInfo i = {reader_id, info.current_version}; - info.readers.push_back(i); - } - else { - j->version = info.current_version; - } - ++info.current_version; -} - -void encryption_note_reader_end(SharedFileInfo& info, const void* reader_id) noexcept -{ - UniqueLock lock(mapping_mutex); - for (auto j = info.readers.begin(); j != info.readers.end(); ++j) - if (j->reader_ID == reader_id) { - // move last over - *j = info.readers.back(); - info.readers.pop_back(); - return; - } -} - -void encryption_mark_pages_for_IV_check(EncryptedFileMapping* mapping) -{ - UniqueLock lock(mapping_mutex); - mapping->mark_pages_for_IV_check(); -} - -namespace { -size_t collect_total_workload() // must be called under lock -{ - size_t total = 0; - for (auto i = mappings_by_file.begin(); i != mappings_by_file.end(); ++i) { - SharedFileInfo& info = *i->info; - info.num_decrypted_pages = 0; - for (auto it = info.mappings.begin(); it != info.mappings.end(); ++it) { - info.num_decrypted_pages += (*it)->collect_decryption_count(); - } - total += info.num_decrypted_pages; - } - return total; -} - -/* Compute the amount of work allowed in an attempt to reclaim pages. - * please refer to EncryptedFileMapping::reclaim_untouched() for more details. - * - * The function starts slowly when the load is 0.5 of target, then turns - * up the volume as the load nears 1.0 - where it sets a work limit of 10%. - * Since the work is expressed (roughly) in terms of pages released, this means - * that about 10 runs has to take place to reclaim all pages possible - though - * if successful the load will rapidly decrease, turning down the work limit. - */ - -struct work_limit_desc { - float base; - float effort; -}; -const std::vector control_table = {{0.5f, 0.001f}, {0.75f, 0.002f}, {0.8f, 0.003f}, - {0.85f, 0.005f}, {0.9f, 0.01f}, {0.95f, 0.03f}, - {1.0f, 0.1f}, {1.5f, 0.2f}, {2.0f, 0.3f}}; - -size_t get_work_limit(size_t decrypted_pages, size_t target) -{ - if (target == 0) - target = 1; - float load = 1.0f * decrypted_pages / target; - float akku = 0.0f; - for (const auto& e : control_table) { - if (load <= e.base) +#ifndef _WIN32 + int prot = PROT_READ; + switch (file.access) { + case File::access_ReadWrite: + prot |= PROT_WRITE; break; - akku += (load - e.base) * e.effort; - } - size_t work_limit = size_t(target * akku); - return work_limit; -} - -/* Find the oldest version that is still of interest to somebody */ -uint64_t get_oldest_version(SharedFileInfo& info) // must be called under lock -{ - auto oldest_version = info.current_version; - for (const auto& e : info.readers) { - if (e.version < oldest_version) { - oldest_version = e.version; - } - } - return oldest_version; -} - -// Reclaim pages for ONE file, limited by a given work limit. -void reclaim_pages_for_file(SharedFileInfo& info, size_t& work_limit) -{ - uint64_t oldest_version = get_oldest_version(info); - if (info.last_scanned_version < oldest_version || info.mappings.empty()) { - // locate the mapping matching the progress index. No such mapping may - // exist, and if so, we'll update the index to the next mapping - for (auto& e : info.mappings) { - auto start_index = e->get_start_index(); - if (info.progress_index < start_index) { - info.progress_index = start_index; - } - if (info.progress_index <= e->get_end_index()) { - e->reclaim_untouched(info.progress_index, work_limit); - if (work_limit == 0) - return; - } - } - // if we get here, all mappings have been considered - info.progress_index = 0; - info.last_scanned_version = info.current_version; - ++info.current_version; - } -} - -// Reclaim pages from all files, limited by a work limit that is derived -// from a target for the amount of dirty (decrypted) pages. The target is -// set by the governor function. -void reclaim_pages() -{ - size_t load; - util::UniqueFunction runnable; - { - UniqueLock lock(mapping_mutex); - load = collect_total_workload(); - num_decrypted_pages = load; - runnable = governor->current_target_getter(load * page_size()); - } - // callback to governor defined function without mutex held - int64_t target = PageReclaimGovernor::no_match; - if (runnable) { - target = runnable(); - } - { - UniqueLock lock(mapping_mutex); - reclaimer_workload = 0; - reclaimer_target = size_t(target / page_size()); - // Putting the target back into the govenor object will allow the govenor - // to return a getter producing this value again next time it is called - governor->report_target_result(target); - - if (target == PageReclaimGovernor::no_match) // temporarily disabled by governor returning no_match - return; - - if (mappings_by_file.size() == 0) - return; - - size_t work_limit = get_work_limit(load, reclaimer_target); - reclaimer_workload = work_limit; - if (file_reclaim_index >= mappings_by_file.size()) - file_reclaim_index = 0; - - while (work_limit > 0) { - SharedFileInfo& info = *mappings_by_file[file_reclaim_index].info; - reclaim_pages_for_file(info, work_limit); - if (work_limit > 0) { // consider next file: - ++file_reclaim_index; - if (file_reclaim_index >= mappings_by_file.size()) - return; - } - } - } -} - - -mapping_and_addr* find_mapping_for_addr(void* addr, size_t size) -{ - for (size_t i = 0; i < mappings_by_addr.size(); ++i) { - mapping_and_addr& m = mappings_by_addr[i]; - if (m.addr == addr && m.size == size) - return &m; - REALM_ASSERT(m.addr != addr); - } - - return 0; -} -} // anonymous namespace - -SharedFileInfo* get_file_info_for_file(File& file) -{ - LockGuard lock(mapping_mutex); - File::UniqueID id = file.get_unique_id(); - std::vector::iterator it; - for (it = mappings_by_file.begin(); it != mappings_by_file.end(); ++it) { - if (it->file_unique_id == id) { + case File::access_ReadOnly: break; - } } - if (it == mappings_by_file.end()) - return nullptr; - else - return it->info.get(); -} - -namespace { -EncryptedFileMapping* add_mapping(void* addr, size_t size, const FileAttributes& file, size_t file_offset) -{ - size_t fs = to_size_t(File::get_size_static(file.fd)); - if (fs > 0 && fs < c_min_encrypted_file_size) - throw DecryptionFailed( - util::format("file size %1 is less than the minimum encrypted file size of %2 for '%3'", fs, - c_min_encrypted_file_size, file.path)); - - LockGuard lock(mapping_mutex); - File::UniqueID fuid = File::get_unique_id(file.fd, file.path); - - std::vector::iterator it; - for (it = mappings_by_file.begin(); it != mappings_by_file.end(); ++it) { - if (it->file_unique_id == fuid) { - break; - } - } - - // Get the potential memory allocation out of the way so that mappings_by_addr.push_back can't throw - mappings_by_addr.reserve(mappings_by_addr.size() + 1); - - if (it == mappings_by_file.end()) { - mappings_by_file.reserve(mappings_by_file.size() + 1); - mappings_for_file f; - f.info = std::make_shared(reinterpret_cast(file.encryption_key)); - f.info->fd = File::dup_file_desc(file.fd); - f.file_unique_id = fuid; + void* addr = ::mmap(nullptr, size, prot, MAP_SHARED, file.fd, offset); + if (addr != MAP_FAILED) + return addr; - mappings_by_file.push_back(f); // can't throw due to reserve() above - it = mappings_by_file.end() - 1; - } - else { - it->info->cryptor.check_key(reinterpret_cast(file.encryption_key)); + int err = errno; // Eliminate any risk of clobbering + if (is_mmap_memory_error(err)) { + throw AddressSpaceExhausted(util::format("mmap() failed: %1 (size: %2, offset: %3)", + make_basic_system_error_code(err).message(), size, offset)); } - try { - mapping_and_addr m; - m.addr = addr; - m.size = size; - m.mapping = std::make_shared(*it->info, file_offset, addr, size, file.access); - mappings_by_addr.push_back(m); // can't throw due to reserve() above - return m.mapping.get(); - } - catch (...) { - if (it->info->mappings.empty()) { - FileDesc fd_to_close = it->info->fd; - mappings_by_file.erase(it); - File::close_static(fd_to_close); // Throws - } - throw; - } -} + throw SystemError(err, util::format("mmap() failed (size: %1, offset: %2", size, offset)); -void remove_mapping(void* addr, size_t size) -{ - size = round_up_to_page_size(size); - LockGuard lock(mapping_mutex); - mapping_and_addr* m = find_mapping_for_addr(addr, size); - if (!m) - return; - - mappings_by_addr.erase(mappings_by_addr.begin() + (m - &mappings_by_addr[0])); - - for (std::vector::iterator it = mappings_by_file.begin(); it != mappings_by_file.end(); ++it) { - if (it->info->mappings.empty()) { - FileDesc fd_to_close = it->info->fd; - mappings_by_file.erase(it); - File::close_static(fd_to_close); // Throws +#else + DWORD protect = PAGE_READONLY; + DWORD desired_access = FILE_MAP_READ; + switch (file.access) { + case File::access_ReadOnly: + break; + case File::access_ReadWrite: + protect = PAGE_READWRITE; + desired_access = FILE_MAP_WRITE; break; - } - } -} -} // anonymous namespace - -void* mmap(const FileAttributes& file, size_t size, size_t offset, EncryptedFileMapping*& mapping) -{ - _impl::SimulatedFailure::trigger_mmap(size); - if (file.encryption_key) { - size = round_up_to_page_size(size); - void* addr = mmap_anon(size); - mapping = add_mapping(addr, size, file, offset); - return addr; - } - else { - mapping = nullptr; - return mmap(file, size, offset); } -} - + LARGE_INTEGER large_int; + if (int_cast_with_overflow_detect(offset + size, large_int.QuadPart)) + throw std::runtime_error("Map size is too large"); + HANDLE map_handle = CreateFileMappingFromApp(file.fd, 0, protect, offset + size, nullptr); + if (!map_handle) + throw AddressSpaceExhausted(get_errno_msg("CreateFileMapping() failed: ", GetLastError()) + + " size: " + util::to_string(size) + " offset: " + util::to_string(offset)); + + if (int_cast_with_overflow_detect(offset, large_int.QuadPart)) + throw RuntimeError(ErrorCodes::RangeError, "Map offset is too large"); + + SIZE_T _size = size; + void* addr = MapViewOfFileFromApp(map_handle, desired_access, offset, _size); + BOOL r = CloseHandle(map_handle); + REALM_ASSERT_RELEASE(r); + if (!addr) + throw AddressSpaceExhausted(get_errno_msg("MapViewOfFileFromApp() failed: ", GetLastError()) + + " size: " + util::to_string(_size) + " offset: " + util::to_string(offset)); -EncryptedFileMapping* reserve_mapping(void* addr, const FileAttributes& file, size_t offset) -{ - return add_mapping(addr, 0, file, offset); -} - -void extend_encrypted_mapping(EncryptedFileMapping* mapping, void* addr, size_t offset, size_t old_size, - size_t new_size) -{ - LockGuard lock(mapping_mutex); - auto m = find_mapping_for_addr(addr, old_size); - REALM_ASSERT(m); - m->size = new_size; - mapping->extend_to(offset, new_size); -} - -void remove_encrypted_mapping(void* addr, size_t size) -{ - remove_mapping(addr, size); -} - -void* mmap_reserve(const FileAttributes& file, size_t reservation_size, size_t offset_in_file, - EncryptedFileMapping*& mapping) -{ - auto addr = mmap_reserve(file.fd, reservation_size, offset_in_file); - if (file.encryption_key) { - REALM_ASSERT(reservation_size == round_up_to_page_size(reservation_size)); - // we create a mapping for the entire reserved area. This causes full initialization of some fairly - // large std::vectors, which it would be nice to avoid. This is left as a future optimization. - mapping = add_mapping(addr, reservation_size, file, offset_in_file); - } - else { - mapping = nullptr; - } return addr; +#endif } -void* mmap_fixed(FileDesc fd, void* address_request, size_t size, File::AccessMode access, size_t offset, - const char* enc_key, EncryptedFileMapping* encrypted_mapping) +#if REALM_ENABLE_ENCRYPTION +std::unique_ptr reserve_mapping(void* addr, const FileAttributes& file, uint64_t offset) { - REALM_ASSERT((enc_key == nullptr) == - (encrypted_mapping == nullptr)); // Mapping must already have been set if encryption is used - if (encrypted_mapping) { -// Since the encryption layer must be able to WRITE into the memory area, -// we have to map it read/write regardless of the request. -// FIXME: Make this work for windows! -#ifdef _WIN32 - return nullptr; -#else - return ::mmap(address_request, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE | MAP_FIXED, -1, 0); -#endif - } - else { - return mmap_fixed(fd, address_request, size, access, offset, enc_key); - } + return file.encryption->add_mapping(offset, addr, 0, file.access); } - #endif // REALM_ENABLE_ENCRYPTION void* mmap_anon(size_t size) @@ -665,15 +189,10 @@ void* mmap_anon(size_t size) #endif } -void* mmap_fixed(FileDesc fd, void* address_request, size_t size, File::AccessMode access, size_t offset, - const char* enc_key) +#ifndef _WIN32 +void* mmap_fixed(FileDesc fd, void* address_request, size_t size, File::AccessMode access, uint64_t offset) { _impl::SimulatedFailure::trigger_mmap(size); - static_cast(enc_key); // FIXME: Consider removing this parameter -#ifdef _WIN32 - REALM_ASSERT(false); - return nullptr; // silence warning -#else auto prot = PROT_READ; if (access == File::access_ReadWrite) prot |= PROT_WRITE; @@ -683,111 +202,15 @@ void* mmap_fixed(FileDesc fd, void* address_request, size_t size, File::AccessMo ", when mapping an already reserved memory area"); } return addr; -#endif -} - -void* mmap_reserve(FileDesc fd, size_t reservation_size, size_t offset_in_file) -{ - // The other mmap operations take an fd as a parameter, so we do too. - // We're not using it for anything currently, but this may change. - // Similarly for offset_in_file. - static_cast(fd); - static_cast(offset_in_file); -#ifdef _WIN32 - REALM_ASSERT(false); // unsupported on windows - return nullptr; -#else - auto addr = ::mmap(0, reservation_size, PROT_NONE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (addr == MAP_FAILED) { - throw std::runtime_error(get_errno_msg("mmap() failed: ", errno)); - } - return addr; -#endif } +#endif // _WIN32 -void* mmap(const FileAttributes& file, size_t size, size_t offset) -{ - _impl::SimulatedFailure::trigger_mmap(size); -#if REALM_ENABLE_ENCRYPTION - if (file.encryption_key) { - size = round_up_to_page_size(size); - void* addr = mmap_anon(size); - add_mapping(addr, size, file, offset); - return addr; - } - else -#else - REALM_ASSERT(!file.encryption_key); -#endif - { - -#ifndef _WIN32 - int prot = PROT_READ; - switch (file.access) { - case File::access_ReadWrite: - prot |= PROT_WRITE; - break; - case File::access_ReadOnly: - break; - } - - void* addr = ::mmap(nullptr, size, prot, MAP_SHARED, file.fd, offset); - if (addr != MAP_FAILED) - return addr; - - int err = errno; // Eliminate any risk of clobbering - if (is_mmap_memory_error(err)) { - throw AddressSpaceExhausted(get_errno_msg("mmap() failed: ", err) + " size: " + util::to_string(size) + - " offset: " + util::to_string(offset)); - } - - throw SystemError(err, std::string("mmap() failed (size: ") + util::to_string(size) + - ", offset: " + util::to_string(offset)); - -#else - // FIXME: Is there anything that we must do on Windows to honor map_NoSync? - - DWORD protect = PAGE_READONLY; - DWORD desired_access = FILE_MAP_READ; - switch (file.access) { - case File::access_ReadOnly: - break; - case File::access_ReadWrite: - protect = PAGE_READWRITE; - desired_access = FILE_MAP_WRITE; - break; - } - LARGE_INTEGER large_int; - if (int_cast_with_overflow_detect(offset + size, large_int.QuadPart)) - throw std::runtime_error("Map size is too large"); - HANDLE map_handle = CreateFileMappingFromApp(file.fd, 0, protect, offset + size, nullptr); - if (!map_handle) - throw AddressSpaceExhausted(get_errno_msg("CreateFileMapping() failed: ", GetLastError()) + - " size: " + util::to_string(size) + " offset: " + util::to_string(offset)); - - if (int_cast_with_overflow_detect(offset, large_int.QuadPart)) - throw RuntimeError(ErrorCodes::RangeError, "Map offset is too large"); - - SIZE_T _size = size; - void* addr = MapViewOfFileFromApp(map_handle, desired_access, offset, _size); - BOOL r = CloseHandle(map_handle); - REALM_ASSERT_RELEASE(r); - if (!addr) - throw AddressSpaceExhausted(get_errno_msg("MapViewOfFileFromApp() failed: ", GetLastError()) + - " size: " + util::to_string(_size) + " offset: " + util::to_string(offset)); - - return addr; -#endif - } -} - void munmap(void* addr, size_t size) { -#if REALM_ENABLE_ENCRYPTION - remove_mapping(addr, size); -#endif - + auto shift = reinterpret_cast(addr) & (page_size() - 1); + addr = static_cast(addr) - shift; + size += shift; #ifdef _WIN32 if (!UnmapViewOfFile(addr)) throw std::system_error(GetLastError(), std::system_category(), "UnmapViewOfFile() failed"); @@ -800,102 +223,8 @@ void munmap(void* addr, size_t size) #endif } -void* mremap(const FileAttributes& file, size_t file_offset, void* old_addr, size_t old_size, size_t new_size) -{ -#if REALM_ENABLE_ENCRYPTION - if (file.encryption_key) { - LockGuard lock(mapping_mutex); - size_t rounded_old_size = round_up_to_page_size(old_size); - if (mapping_and_addr* m = find_mapping_for_addr(old_addr, rounded_old_size)) { - size_t rounded_new_size = round_up_to_page_size(new_size); - if (rounded_old_size == rounded_new_size) - return old_addr; - - void* new_addr = mmap_anon(rounded_new_size); - m->mapping->set(new_addr, rounded_new_size, file_offset); - m->addr = new_addr; - m->size = rounded_new_size; -#ifdef _WIN32 - if (!UnmapViewOfFile(old_addr)) - throw std::system_error(GetLastError(), std::system_category(), "UnmapViewOfFile() failed"); -#else - if (::munmap(old_addr, rounded_old_size)) { - int err = errno; - throw std::system_error(err, std::system_category(), "munmap() failed"); - } -#endif - return new_addr; - } - // If we are using encryption, we must have used mmap and the mapping - // must have been added to the cache therefore find_mapping_for_addr() - // will succeed. Otherwise we would continue to mmap it below without - // the encryption key which is an error. - REALM_UNREACHABLE(); - } -#endif - -#ifdef _GNU_SOURCE - { - void* new_addr = ::mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE); - if (new_addr != MAP_FAILED) - return new_addr; - int err = errno; // Eliminate any risk of clobbering - // Do not throw here if mremap is declared as "not supported" by the - // platform Eg. When compiling with GNU libc on OSX, iOS. - // In this case fall through to no-mremap case below. - if (err != ENOTSUP && err != ENOSYS) { - if (is_mmap_memory_error(err)) { - throw AddressSpaceExhausted(get_errno_msg("mremap() failed: ", err) + " old size: " + - util::to_string(old_size) + " new size: " + util::to_string(new_size)); - } - throw std::system_error(err, std::system_category(), - std::string("_gnu_src mmap() failed (") + "old_size: " + - util::to_string(old_size) + ", new_size: " + util::to_string(new_size) + ")"); - } - } -#endif - - void* new_addr = mmap(file, new_size, file_offset); - -#ifdef _WIN32 - if (!UnmapViewOfFile(old_addr)) - throw std::system_error(GetLastError(), std::system_category(), "UnmapViewOfFile() failed"); -#else - if (::munmap(old_addr, old_size) != 0) { - int err = errno; - throw std::system_error(err, std::system_category(), "munmap() failed"); - } -#endif - - return new_addr; -} - void msync(FileDesc fd, void* addr, size_t size) { -#if REALM_ENABLE_ENCRYPTION - { - // first check the encrypted mappings - LockGuard lock(mapping_mutex); - if (mapping_and_addr* m = find_mapping_for_addr(addr, round_up_to_page_size(size))) { - m->mapping->flush(); - m->mapping->sync(); - return; - } - } -#endif - - // not an encrypted mapping - - // FIXME: on iOS/OSX fsync may not be enough to ensure crash safety. - // Consider adding fcntl(F_FULLFSYNC). This most likely also applies to msync. - // - // See description of fsync on iOS here: - // https://developer.apple.com/library/ios/documentation/System/Conceptual/ManPages_iPhoneOS/man2/fsync.2.html - // - // See also - // https://developer.apple.com/library/ios/documentation/Cocoa/Conceptual/CoreData/Articles/cdPersistentStores.html - // for a discussion of this related to core data. - #ifdef _WIN32 // FlushViewOfFile() is asynchronous and won't flush metadata (file size, etc) if (!FlushViewOfFile(addr, size)) { @@ -918,5 +247,17 @@ void msync(FileDesc fd, void* addr, size_t size) } #endif } -} // namespace util -} // namespace realm + +#if REALM_ENABLE_ENCRYPTION +void do_encryption_read_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping, bool to_modify) +{ + mapping->read_barrier(addr, size, to_modify); +} + +void do_encryption_write_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping) +{ + mapping->write_barrier(addr, size); +} +#endif // REALM_ENABLE_ENCRYPTION + +} // namespace realm::util diff --git a/src/realm/util/file_mapper.hpp b/src/realm/util/file_mapper.hpp index d4cd667823d..9842a986a55 100644 --- a/src/realm/util/file_mapper.hpp +++ b/src/realm/util/file_mapper.hpp @@ -21,169 +21,62 @@ #include #include -#include -#include -#include - -#include -#include - -namespace realm { -namespace util { +namespace realm::util { struct FileAttributes { FileDesc fd; - std::string path; File::AccessMode access; - const char* encryption_key = nullptr; + EncryptedFile* encryption; }; -void* mmap(const FileAttributes& file, size_t size, size_t offset); -void* mmap_fixed(FileDesc fd, void* address_request, size_t size, File::AccessMode access, size_t offset, - const char* enc_key); -void* mmap_reserve(FileDesc fd, size_t size, size_t offset); +class EncryptedFileMapping; + +void* mmap(const FileAttributes& file, size_t size, uint64_t offset, std::unique_ptr& mapping); +void* mmap_fixed(FileDesc fd, void* address_request, size_t size, File::AccessMode access, uint64_t offset); void munmap(void* addr, size_t size); -void* mremap(const FileAttributes& file, size_t file_offset, void* old_addr, size_t old_size, size_t new_size); void msync(FileDesc fd, void* addr, size_t size); void* mmap_anon(size_t size); -// A function which may be given to encryption_read_barrier. If present, the read barrier is a -// a barrier for a full array. If absent, the read barrier is a barrier only for the address -// range give as argument. If the barrier is for a full array, it will read the array header -// and determine the address range from the header. -using HeaderToSize = size_t (*)(const char* addr); -class EncryptedFileMapping; - -class PageReclaimGovernor { -public: - // Called by the page reclaimer with the current load (in bytes) and - // must return the target load (also in bytes). Returns no_match if no - // target can be set - static constexpr int64_t no_match = -1; - virtual util::UniqueFunction current_target_getter(size_t load) = 0; - virtual void report_target_result(int64_t) = 0; -}; - -// Set a page reclaim governor. The governor is an object with a method which will be called periodically -// and must return a 'target' amount of memory to hold decrypted pages. The page reclaim daemon -// will then try to release pages to meet the target. The governor is called with the current -// amount of data used, for the purpose of logging - or possibly for computing the target -// -// The governor is called approximately once per second. -// -// If no governor is installed, the page reclaim daemon will not start. -void set_page_reclaim_governor(PageReclaimGovernor* governor); - -// Use the default governor. The default governor is used automatically if nothing else is set, so -// this funciton is mostly useful for tests where changing back to the default could be desirable. -inline void set_page_reclaim_governor_to_default() -{ - set_page_reclaim_governor(nullptr); -} - -// Retrieves the number of in memory decrypted pages, across all open files. -size_t get_num_decrypted_pages(); - #if REALM_ENABLE_ENCRYPTION -void encryption_note_reader_start(SharedFileInfo& info, const void* reader_id); -void encryption_note_reader_end(SharedFileInfo& info, const void* reader_id) noexcept; - -SharedFileInfo* get_file_info_for_file(File& file); +void* mmap_fixed(FileDesc fd, void* address_request, size_t size, File::AccessMode access, uint64_t offset); -// This variant allows the caller to obtain direct access to the encrypted file mapping -// for optimization purposes. -void* mmap(const FileAttributes& file, size_t size, size_t offset, EncryptedFileMapping*& mapping); -void* mmap_fixed(FileDesc fd, void* address_request, size_t size, File::AccessMode access, size_t offset, - const char* enc_key, EncryptedFileMapping* mapping); +std::unique_ptr reserve_mapping(void* addr, const FileAttributes& file, uint64_t offset); -void* mmap_reserve(const FileAttributes& file, size_t size, size_t offset, EncryptedFileMapping*& mapping); +void do_encryption_read_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping, bool to_modify); +void do_encryption_write_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping); -EncryptedFileMapping* reserve_mapping(void* addr, const FileAttributes& file, size_t offset); +#else -void extend_encrypted_mapping(EncryptedFileMapping* mapping, void* addr, size_t offset, size_t old_size, - size_t new_size); -void remove_encrypted_mapping(void* addr, size_t size); -void do_encryption_read_barrier(const void* addr, size_t size, HeaderToSize header_to_size, - EncryptedFileMapping* mapping, bool to_modify); +inline void do_encryption_read_barrier(const void*, size_t, EncryptedFileMapping*, bool) {} +inline void do_encryption_write_barrier(const void*, size_t, EncryptedFileMapping*) {} -void do_encryption_write_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping); +#endif -void inline encryption_read_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping, - HeaderToSize header_to_size = nullptr, bool to_modify = false) +inline void encryption_read_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping) { if (REALM_UNLIKELY(mapping)) - do_encryption_read_barrier(addr, size, header_to_size, mapping, to_modify); + do_encryption_read_barrier(addr, size, mapping, false); } -void inline encryption_read_barrier_for_write(const void* addr, size_t size, EncryptedFileMapping* mapping) +inline void encryption_read_barrier_for_write(const void* addr, size_t size, EncryptedFileMapping* mapping) { if (REALM_UNLIKELY(mapping)) - do_encryption_read_barrier(addr, size, nullptr, mapping, true); + do_encryption_read_barrier(addr, size, mapping, true); } -void inline encryption_write_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping) +inline void encryption_write_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping) { if (REALM_UNLIKELY(mapping)) do_encryption_write_barrier(addr, size, mapping); } - -extern util::Mutex& mapping_mutex; - -void inline encryption_flush(EncryptedFileMapping* mapping) -{ - UniqueLock lock(mapping_mutex); - mapping->flush(); -} - -inline void do_encryption_read_barrier(const void* addr, size_t size, HeaderToSize header_to_size, - EncryptedFileMapping* mapping, bool to_modify) -{ - UniqueLock lock(mapping_mutex); - mapping->read_barrier(addr, size, header_to_size, to_modify); -} - -inline void do_encryption_write_barrier(const void* addr, size_t size, EncryptedFileMapping* mapping) -{ - LockGuard lock(mapping_mutex); - mapping->write_barrier(addr, size); -} - -#else - - -size_t inline get_num_decrypted_pages() -{ - return 0; -} - -void inline set_page_reclaim_governor(PageReclaimGovernor*) {} -void inline encryption_read_barrier(const void*, size_t, EncryptedFileMapping*, HeaderToSize = nullptr) {} -void inline encryption_read_barrier_for_write(const void*, size_t, EncryptedFileMapping*) {} -void inline encryption_write_barrier(const void*, size_t) {} -void inline encryption_write_barrier(const void*, size_t, EncryptedFileMapping*) {} -void inline do_encryption_read_barrier(const void*, size_t, HeaderToSize, EncryptedFileMapping*, bool) {} -void inline do_encryption_write_barrier(const void*, size_t, EncryptedFileMapping*) {} - -#endif - // helpers for encrypted Maps template void encryption_read_barrier(const File::Map& map, size_t index, size_t num_elements = 1) { if (auto mapping = map.get_encrypted_mapping(); REALM_UNLIKELY(mapping)) { - do_encryption_read_barrier(map.get_addr() + index, sizeof(T) * num_elements, nullptr, mapping, - map.is_writeable()); - } -} - -template -void encryption_read_barrier_for_write(const File::Map& map, size_t index, size_t num_elements = 1) -{ - if (auto mapping = map.get_encrypted_mapping(); REALM_UNLIKELY(mapping)) { - do_encryption_read_barrier(map.get_addr() + index, sizeof(T) * num_elements, nullptr, mapping, - map.is_writeable()); + do_encryption_read_barrier(map.get_addr() + index, sizeof(T) * num_elements, mapping, map.is_writeable()); } } @@ -194,13 +87,10 @@ void encryption_write_barrier(const File::Map& map, size_t index, size_t num_ do_encryption_write_barrier(map.get_addr() + index, sizeof(T) * num_elements, mapping); } } -void encryption_mark_pages_for_IV_check(EncryptedFileMapping* mapping); File::SizeType encrypted_size_to_data_size(File::SizeType size) noexcept; File::SizeType data_size_to_encrypted_size(File::SizeType size) noexcept; size_t round_up_to_page_size(size_t size) noexcept; - -} // namespace util -} // namespace realm +} // namespace realm::util #endif diff --git a/src/realm/util/load_file.cpp b/src/realm/util/load_file.cpp index 5eb79a444fe..8c0d2d9a36c 100644 --- a/src/realm/util/load_file.cpp +++ b/src/realm/util/load_file.cpp @@ -13,19 +13,10 @@ std::string util::load_file(const std::string& path) for (;;) { std::size_t min_extra_capacity = 256; buffer.reserve_extra(used_size, min_extra_capacity); // Throws - std::size_t n = file.read(buffer.data() + used_size, buffer.size() - used_size); // Throws + std::size_t n = file.read(used_size, buffer.data() + used_size, buffer.size() - used_size); // Throws if (n == 0) break; used_size += n; } return std::string(buffer.data(), used_size); // Throws } - - -std::string util::load_file_and_chomp(const std::string& path) -{ - std::string contents = load_file(path); // Throws - if (!contents.empty() && contents.back() == '\n') - contents.pop_back(); - return contents; -} diff --git a/src/realm/util/load_file.hpp b/src/realm/util/load_file.hpp index c12613a5d07..fe41bec980a 100644 --- a/src/realm/util/load_file.hpp +++ b/src/realm/util/load_file.hpp @@ -9,7 +9,6 @@ namespace util { // FIXME: These functions ought to be moved to in the // realm-core repository. std::string load_file(const std::string& path); -std::string load_file_and_chomp(const std::string& path); } // namespace util } // namespace realm diff --git a/src/realm/util/safe_int_ops.hpp b/src/realm/util/safe_int_ops.hpp index 659249716fc..f92af37b18a 100644 --- a/src/realm/util/safe_int_ops.hpp +++ b/src/realm/util/safe_int_ops.hpp @@ -25,11 +25,11 @@ #include #endif -#include - #include #include +#include + namespace realm { namespace util { @@ -55,17 +55,17 @@ namespace util { /// integers. template -inline bool int_equal_to(A, B) noexcept; +constexpr bool int_equal_to(A, B) noexcept; template -inline bool int_not_equal_to(A, B) noexcept; +constexpr bool int_not_equal_to(A, B) noexcept; template -inline bool int_less_than(A, B) noexcept; +constexpr bool int_less_than(A, B) noexcept; template -inline bool int_less_than_or_equal(A, B) noexcept; +constexpr bool int_less_than_or_equal(A, B) noexcept; template -inline bool int_greater_than(A, B) noexcept; +constexpr bool int_greater_than(A, B) noexcept; template -inline bool int_greater_than_or_equal(A, B) noexcept; +constexpr bool int_greater_than_or_equal(A, B) noexcept; //@} @@ -89,10 +89,10 @@ inline bool int_greater_than_or_equal(A, B) noexcept; /// integers. template -inline bool int_add_with_overflow_detect(L& lval, R rval) noexcept; +constexpr bool int_add_with_overflow_detect(L& lval, R rval) noexcept; template -inline bool int_subtract_with_overflow_detect(L& lval, R rval) noexcept; +constexpr bool int_subtract_with_overflow_detect(L& lval, R rval) noexcept; //@} @@ -113,7 +113,7 @@ inline bool int_subtract_with_overflow_detect(L& lval, R rval) noexcept; /// specializations of std::numeric_limits<> and that both are indeed /// integers. template -inline bool int_multiply_with_overflow_detect(L& lval, R rval) noexcept; +constexpr bool int_multiply_with_overflow_detect(L& lval, R rval) noexcept; /// Checks for positive overflow when performing a bitwise shift to @@ -128,7 +128,7 @@ inline bool int_multiply_with_overflow_detect(L& lval, R rval) noexcept; /// value of i must not exceed the number of bits of storage type T as /// shifting by this amount is not defined by the standard. template -inline bool int_shift_left_with_overflow_detect(T& lval, int i) noexcept; +constexpr bool int_shift_left_with_overflow_detect(T& lval, int i) noexcept; //@{ @@ -146,10 +146,10 @@ inline bool int_shift_left_with_overflow_detect(T& lval, int i) noexcept; /// except that it complies with at least C++03. template -bool int_cast_has_overflow(From from) noexcept; +constexpr bool int_cast_has_overflow(From from) noexcept; template -bool int_cast_with_overflow_detect(From from, To& to) noexcept; +constexpr bool int_cast_with_overflow_detect(From from, To& to) noexcept; //@} @@ -164,11 +164,11 @@ struct SafeIntBinopsImpl; template struct SafeIntBinopsImpl == std::is_signed_v>> { using common = std::common_type_t; - static bool equal(L l, R r) noexcept + constexpr static bool equal(L l, R r) noexcept { return common(l) == common(r); } - static bool less(L l, R r) noexcept + constexpr static bool less(L l, R r) noexcept { return common(l) < common(r); } @@ -179,11 +179,11 @@ template struct SafeIntBinopsImpl && std::is_signed_v>> { using lim_l = std::numeric_limits; using lim_r = std::numeric_limits; - static bool equal(L l, R r) noexcept + constexpr static bool equal(L l, R r) noexcept { return (lim_l::digits > lim_r::digits) ? r >= 0 && l == L(r) : R(l) == r; } - static bool less(L l, R r) noexcept + constexpr static bool less(L l, R r) noexcept { return (lim_l::digits > lim_r::digits) ? r >= 0 && l < L(r) : R(l) < r; } @@ -192,12 +192,12 @@ struct SafeIntBinopsImpl && std::is_ // (signed, unsigned) (all size combinations) template struct SafeIntBinopsImpl && !std::is_signed_v>> { - static bool equal(L l, R r) noexcept + constexpr static bool equal(L l, R r) noexcept { // r == l return SafeIntBinopsImpl::equal(r, l); } - static bool less(L l, R r) noexcept + constexpr static bool less(L l, R r) noexcept { // !(r == l || r < l) return !(SafeIntBinopsImpl::equal(r, l) || SafeIntBinopsImpl::less(r, l)); @@ -218,43 +218,43 @@ struct SafeIntBinops : SafeIntBinopsImpl { namespace util { template -inline bool int_equal_to(A a, B b) noexcept +constexpr bool int_equal_to(A a, B b) noexcept { return realm::_impl::SafeIntBinops::equal(a, b); } template -inline bool int_not_equal_to(A a, B b) noexcept +constexpr bool int_not_equal_to(A a, B b) noexcept { return !realm::_impl::SafeIntBinops::equal(a, b); } template -inline bool int_less_than(A a, B b) noexcept +constexpr bool int_less_than(A a, B b) noexcept { return realm::_impl::SafeIntBinops::less(a, b); } template -inline bool int_less_than_or_equal(A a, B b) noexcept +constexpr bool int_less_than_or_equal(A a, B b) noexcept { return !realm::_impl::SafeIntBinops::less(b, a); // Not greater than } template -inline bool int_greater_than(A a, B b) noexcept +constexpr bool int_greater_than(A a, B b) noexcept { return realm::_impl::SafeIntBinops::less(b, a); } template -inline bool int_greater_than_or_equal(A a, B b) noexcept +constexpr bool int_greater_than_or_equal(A a, B b) noexcept { return !realm::_impl::SafeIntBinops::less(a, b); // Not less than } template -inline bool int_add_with_overflow_detect(L& lval, R rval) noexcept +constexpr bool int_add_with_overflow_detect(L& lval, R rval) noexcept { // Note: MSVC returns true on success, while gcc/clang return true on overflow. // Note: Both may write to destination on overflow, but our tests check that this doesn't happen. @@ -270,7 +270,7 @@ inline bool int_add_with_overflow_detect(L& lval, R rval) noexcept } template -inline bool int_subtract_with_overflow_detect(L& lval, R rval) noexcept +constexpr bool int_subtract_with_overflow_detect(L& lval, R rval) noexcept { auto old = lval; #ifdef _MSC_VER @@ -284,7 +284,7 @@ inline bool int_subtract_with_overflow_detect(L& lval, R rval) noexcept } template -inline bool int_multiply_with_overflow_detect(L& lval, R rval) noexcept +constexpr bool int_multiply_with_overflow_detect(L& lval, R rval) noexcept { auto old = lval; #ifdef _MSC_VER @@ -298,7 +298,7 @@ inline bool int_multiply_with_overflow_detect(L& lval, R rval) noexcept } template -inline bool int_shift_left_with_overflow_detect(T& lval, int i) noexcept +constexpr bool int_shift_left_with_overflow_detect(T& lval, int i) noexcept { typedef std::numeric_limits lim; static_assert(lim::is_specialized, "std::numeric_limits<> must be specialized for T"); @@ -311,14 +311,14 @@ inline bool int_shift_left_with_overflow_detect(T& lval, int i) noexcept } template -inline bool int_cast_has_overflow(From from) noexcept +constexpr bool int_cast_has_overflow(From from) noexcept { typedef std::numeric_limits lim_to; return int_less_than(from, lim_to::min()) || int_less_than(lim_to::max(), from); } template -inline bool int_cast_with_overflow_detect(From from, To& to) noexcept +constexpr bool int_cast_with_overflow_detect(From from, To& to) noexcept { if (REALM_LIKELY(!int_cast_has_overflow(from))) { to = To(from); diff --git a/src/realm/utilities.hpp b/src/realm/utilities.hpp index fc3a9c5bd1a..2125fe2c2fa 100644 --- a/src/realm/utilities.hpp +++ b/src/realm/utilities.hpp @@ -121,10 +121,6 @@ REALM_FORCEINLINE bool sseavx() } void cpuid_init(); -void* round_up(void* p, size_t align); -void* round_down(void* p, size_t align); -constexpr size_t round_up(size_t p, size_t align); -constexpr size_t round_down(size_t p, size_t align); void millisleep(unsigned long milliseconds); #ifdef _WIN32 @@ -334,25 +330,25 @@ inline char toLowerAscii(char c) return c; } -inline void* round_up(void* p, size_t align) +inline void* round_up(void* p, uintptr_t align) { - size_t r = size_t(p) % align == 0 ? 0 : align - size_t(p) % align; + uintptr_t r = uintptr_t(p) % align == 0 ? 0 : align - uintptr_t(p) % align; return static_cast(p) + r; } -inline void* round_down(void* p, size_t align) +inline void* round_down(void* p, uintptr_t align) { - size_t r = size_t(p); + uintptr_t r = uintptr_t(p); return reinterpret_cast(r & ~(align - 1)); } -constexpr inline size_t round_up(size_t p, size_t align) +constexpr size_t round_up(size_t p, size_t align) { size_t r = p % align == 0 ? 0 : align - p % align; return p + r; } -constexpr inline size_t round_down(size_t p, size_t align) +constexpr size_t round_down(size_t p, size_t align) { size_t r = p; return r & (~(align - 1)); diff --git a/test/fuzz_group.cpp b/test/fuzz_group.cpp index eb423ae2b1a..f4c4e3c3c26 100644 --- a/test/fuzz_group.cpp +++ b/test/fuzz_group.cpp @@ -19,6 +19,7 @@ #include "fuzz_group.hpp" #include +#include #include #include diff --git a/test/object-store/sync/client_reset.cpp b/test/object-store/sync/client_reset.cpp index 9014d8f8e35..e75a61d5bcb 100644 --- a/test/object-store/sync/client_reset.cpp +++ b/test/object-store/sync/client_reset.cpp @@ -1099,10 +1099,10 @@ TEST_CASE("sync: client reset", "[sync][pbs][client reset][baas]") { err = error; }; std::string fresh_path = realm::_impl::client_reset::get_fresh_path_for(local_config.path); - util::File f(fresh_path, util::File::Mode::mode_Write); - f.write("a non empty file"); - f.sync(); - f.close(); + { + util::File f(fresh_path, util::File::Mode::mode_Write); + f.write(0, "a non empty file"); + } make_reset(local_config, remote_config)->run(); REQUIRE(!err); diff --git a/test/object-store/sync/metadata.cpp b/test/object-store/sync/metadata.cpp index ccfe3ed18a1..68a42c2b839 100644 --- a/test/object-store/sync/metadata.cpp +++ b/test/object-store/sync/metadata.cpp @@ -75,7 +75,6 @@ bool can_access_keychain() }(); return can_access_keychain; } -#endif CFPtr build_search_dictionary(CFStringRef account, CFStringRef service) { @@ -117,6 +116,7 @@ std::vector generate_key() arc4random_buf(key.data(), key.size()); return key; } +#endif // REALM_ENABLE_ENCRYPTION #endif // REALM_PLATFORM_APPLE } // anonymous namespace diff --git a/test/object-store/sync/sync_manager.cpp b/test/object-store/sync/sync_manager.cpp index d3dcd1cac33..5805ab40560 100644 --- a/test/object-store/sync/sync_manager.cpp +++ b/test/object-store/sync/sync_manager.cpp @@ -146,9 +146,7 @@ TEST_CASE("App: path_for_realm API", "[sync][app][file]") { SyncConfig config(user, SyncConfig::FLXSyncEnabled{}); std::string path = app->path_for_realm(config, util::make_optional("custom.realm")); realm::test_util::TestPathGuard guard(path); - realm::util::File existing_realm_file(path, File::mode_Write); - existing_realm_file.write(std::string("test")); - existing_realm_file.sync(); + realm::util::File(path, File::mode_Write).write(0, "test"); REQUIRE(app->path_for_realm(config, util::make_optional("custom.realm")) == base_path / "custom.realm"); } diff --git a/test/object-store/thread_safe_reference.cpp b/test/object-store/thread_safe_reference.cpp index 1e032a29c39..232db8cf9e8 100644 --- a/test/object-store/thread_safe_reference.cpp +++ b/test/object-store/thread_safe_reference.cpp @@ -72,6 +72,7 @@ TEST_CASE("thread safe reference") { config.automatic_change_notifications = false; config.schema = schema; config.in_memory = true; + config.encryption_key.clear(); auto r = Realm::get_shared_realm(config); const auto int_obj_col = r->schema().find("int object")->persisted_properties[0].column_key; diff --git a/test/object-store/util/sync/baas_admin_api.cpp b/test/object-store/util/sync/baas_admin_api.cpp index 9b7c6f3ffe6..4b0f5546e8a 100644 --- a/test/object-store/util/sync/baas_admin_api.cpp +++ b/test/object-store/util/sync/baas_admin_api.cpp @@ -419,8 +419,8 @@ class Baasaas { baas_coid)); } logger->info("Baasaas container started with id \"%1\"", m_container_id); - auto lock_file = util::File(std::string{s_baasaas_lock_file_name}, util::File::mode_Write); - lock_file.write(m_container_id); + util::File lock_file(s_baasaas_lock_file_name, util::File::mode_Write); + lock_file.write(0, m_container_id); } explicit Baasaas(std::string api_key, std::string baasaas_instance_id) diff --git a/test/realm-fuzzer/fuzz_configurator.cpp b/test/realm-fuzzer/fuzz_configurator.cpp index b3dd1a1ac76..461fa9f2abb 100644 --- a/test/realm-fuzzer/fuzz_configurator.cpp +++ b/test/realm-fuzzer/fuzz_configurator.cpp @@ -15,9 +15,13 @@ * limitations under the License. * **************************************************************************/ + #include "fuzz_configurator.hpp" + #include "fuzz_object.hpp" #include "../util/test_path.hpp" + +#include #include FuzzConfigurator::FuzzConfigurator(FuzzObject& fuzzer, const std::string& input, bool use_input_file, @@ -109,4 +113,4 @@ void FuzzConfigurator::print_cnf() !m_use_encryption ? "nullptr" : std::string("\"") + m_config.encryption_key.data() + "\""; m_log << "// const char* key = " << printable_key << ";\n"; m_log << "\n"; -} \ No newline at end of file +} diff --git a/test/realm-fuzzer/fuzz_configurator.hpp b/test/realm-fuzzer/fuzz_configurator.hpp index 33c3203a181..904ce828db9 100644 --- a/test/realm-fuzzer/fuzz_configurator.hpp +++ b/test/realm-fuzzer/fuzz_configurator.hpp @@ -15,12 +15,15 @@ * limitations under the License. * **************************************************************************/ + #ifndef FUZZ_CONFIG_HPP #define FUZZ_CONFIG_HPP #include "util.hpp" #include "fuzz_logger.hpp" + #include + #include #include @@ -49,4 +52,4 @@ class FuzzConfigurator { State m_state; std::string m_fuzz_name; }; -#endif \ No newline at end of file +#endif diff --git a/test/test_all.cpp b/test/test_all.cpp index eeae889332f..17d71f593ed 100644 --- a/test/test_all.cpp +++ b/test/test_all.cpp @@ -200,19 +200,6 @@ void set_random_seed() random_seed(unit_test_random_seed); } -class AggressiveGovernor : public util::PageReclaimGovernor { -public: - util::UniqueFunction current_target_getter(size_t) override - { - return []() { - return 4096; - }; - } - void report_target_result(int64_t) override {} -}; - -AggressiveGovernor aggressive_governor; - void set_always_encrypt() { if (const char* env = getenv("UNITTEST_ENCRYPT_ALL")) { @@ -222,8 +209,6 @@ void set_always_encrypt() } if (str == "1" || str == "on" || str == "yes") { enable_always_encrypt(); - // ask for a very aggressive page reclaimer to maximize chance of triggering a bug. - realm::util::set_page_reclaim_governor(&aggressive_governor); } } } diff --git a/test/test_alloc.cpp b/test/test_alloc.cpp index 546874be6b1..56382fb444d 100644 --- a/test/test_alloc.cpp +++ b/test/test_alloc.cpp @@ -156,9 +156,6 @@ TEST(Alloc_AttachFile) } } - -// FIXME: Fails on Windows -#ifndef _MSC_VER TEST(Alloc_BadFile) { GROUP_TEST_PATH(path_1); @@ -166,7 +163,7 @@ TEST(Alloc_BadFile) { File file(path_1, File::mode_Append); - file.write("foo"); + file.write(0, "foo"); } { @@ -189,8 +186,6 @@ TEST(Alloc_BadFile) CHECK_THROW(alloc.attach_file(path_1, cfg), InvalidDatabase); } } -#endif - TEST(Alloc_AttachBuffer) { @@ -211,7 +206,7 @@ TEST(Alloc_AttachBuffer) buffer_size = size_t(file.get_size()); buffer.reset(new char[buffer_size]); CHECK(bool(buffer)); - file.read(buffer.get(), buffer_size); + file.read(0, buffer.get(), buffer_size); } File::remove(path); } diff --git a/test/test_compaction.cpp b/test/test_compaction.cpp index a8f0e53fde9..d22f92ef101 100644 --- a/test/test_compaction.cpp +++ b/test/test_compaction.cpp @@ -17,6 +17,7 @@ **************************************************************************/ #include +#include #include #include "test.hpp" diff --git a/test/test_encrypted_file_mapping.cpp b/test/test_encrypted_file_mapping.cpp index 306879f918e..0cd212ed444 100644 --- a/test/test_encrypted_file_mapping.cpp +++ b/test/test_encrypted_file_mapping.cpp @@ -63,7 +63,7 @@ using namespace realm::util; using realm::FileDesc; namespace { -const uint8_t test_key[] = "1234567890123456789012345678901123456789012345678901234567890123"; +const char test_key[] = "1234567890123456789012345678901123456789012345678901234567890123"; } TEST(EncryptedFile_CryptorBasic) @@ -71,13 +71,13 @@ TEST(EncryptedFile_CryptorBasic) TEST_PATH(path); AESCryptor cryptor(test_key); - cryptor.set_file_size(16); + cryptor.set_data_size(16); const char data[4096] = "test data"; char buffer[4096]; File file(path, realm::util::File::mode_Write); - cryptor.write(file.get_descriptor(), 0, data, sizeof(data)); - cryptor.read(file.get_descriptor(), 0, buffer, sizeof(buffer)); + cryptor.write(file.get_descriptor(), 0, data); + cryptor.read(file.get_descriptor(), 0, buffer); CHECK(memcmp(buffer, data, strlen(data)) == 0); } @@ -85,20 +85,18 @@ TEST(EncryptedFile_CryptorRepeatedWrites) { TEST_PATH(path); AESCryptor cryptor(test_key); - cryptor.set_file_size(16); + cryptor.set_data_size(16); const char data[4096] = "test data"; char raw_buffer_1[8192] = {0}, raw_buffer_2[8192] = {0}; File file(path, realm::util::File::mode_Write); - cryptor.write(file.get_descriptor(), 0, data, sizeof(data)); - file.seek(0); - ssize_t actual_read_1 = file.read(raw_buffer_1, sizeof(raw_buffer_1)); + cryptor.write(file.get_descriptor(), 0, data); + ssize_t actual_read_1 = file.read(0, raw_buffer_1, sizeof(raw_buffer_1)); CHECK_EQUAL(actual_read_1, sizeof(raw_buffer_1)); - cryptor.write(file.get_descriptor(), 0, data, sizeof(data)); - file.seek(0); - ssize_t actual_read_2 = file.read(raw_buffer_2, sizeof(raw_buffer_2)); + cryptor.write(file.get_descriptor(), 0, data); + ssize_t actual_read_2 = file.read(0, raw_buffer_2, sizeof(raw_buffer_2)); CHECK_EQUAL(actual_read_2, sizeof(raw_buffer_2)); CHECK(memcmp(raw_buffer_1, raw_buffer_2, sizeof(raw_buffer_1)) != 0); @@ -114,13 +112,13 @@ TEST(EncryptedFile_SeparateCryptors) File file(path, realm::util::File::mode_Write); { AESCryptor cryptor(test_key); - cryptor.set_file_size(16); - cryptor.write(file.get_descriptor(), 0, data, sizeof(data)); + cryptor.set_data_size(16); + cryptor.write(file.get_descriptor(), 0, data); } { AESCryptor cryptor(test_key); - cryptor.set_file_size(16); - cryptor.read(file.get_descriptor(), 0, buffer, sizeof(buffer)); + cryptor.set_data_size(16); + cryptor.read(file.get_descriptor(), 0, buffer); } CHECK(memcmp(buffer, data, strlen(data)) == 0); @@ -135,265 +133,501 @@ TEST(EncryptedFile_InterruptedWrite) File file(path, realm::util::File::mode_Write); { AESCryptor cryptor(test_key); - cryptor.set_file_size(16); - cryptor.write(file.get_descriptor(), 0, data, sizeof(data)); + cryptor.set_data_size(16); + cryptor.write(file.get_descriptor(), 0, data); } // Fake an interrupted write which updates the IV table but not the data char buffer[4096]; - file.seek(0); - size_t actual_pread = file.read(buffer, 64); + size_t actual_pread = file.read(0, buffer, 64); CHECK_EQUAL(actual_pread, 64); memcpy(buffer + 32, buffer, 32); buffer[5]++; // first byte of "hmac1" field in iv table - file.seek(0); - file.write(buffer, 64); + file.write(0, buffer, 64); { AESCryptor cryptor(test_key); - cryptor.set_file_size(16); - cryptor.read(file.get_descriptor(), 0, buffer, sizeof(buffer)); + cryptor.set_data_size(16); + cryptor.read(file.get_descriptor(), 0, buffer); CHECK(memcmp(buffer, data, strlen(data)) == 0); } } -TEST(EncryptedFile_LargePages) +TEST(EncryptedFile_IVRefreshing) { - TEST_PATH(path); + constexpr size_t page_size = 4096; + constexpr size_t pages_per_metadata_block = 64; + + // enough data to span two metadata blocks + constexpr size_t page_count = pages_per_metadata_block * 2; + constexpr File::SizeType data_size = page_size * page_count; + char data[page_size]; + std::iota(std::begin(data), std::end(data), 0); - char data[4096 * 4]; - for (size_t i = 0; i < sizeof(data); ++i) - data[i] = static_cast(i); + TEST_PATH(path); + File file(path, realm::util::File::mode_Write); + const FileDesc fd = file.get_descriptor(); AESCryptor cryptor(test_key); - cryptor.set_file_size(sizeof(data)); - char buffer[sizeof(data)]; + cryptor.set_data_size(data_size); + for (File::SizeType i = 0; i < data_size; i += page_size) { + cryptor.write(fd, i, data); + } + // The IVs for the pages we just wrote should obviously be up to date + for (size_t i = 0; i < page_count; ++i) { + CHECK_NOT(cryptor.refresh_iv(fd, i)); + } + // and we should see the same ones after rereading them + cryptor.invalidate_ivs(); + for (size_t i = 0; i < page_count; ++i) { + CHECK_NOT(cryptor.refresh_iv(fd, i)); + } - File file(path, realm::util::File::mode_Write); - cryptor.write(file.get_descriptor(), 0, data, sizeof(data)); - cryptor.read(file.get_descriptor(), 0, buffer, sizeof(buffer)); - CHECK(memcmp(buffer, data, sizeof(data)) == 0); + AESCryptor cryptor2(test_key); + cryptor2.set_data_size(data_size); + for (size_t i = 0; i < page_count; ++i) { + // Each IV should be up to date immediately after reading the page + cryptor2.read(fd, File::SizeType(i) * page_size, data); + CHECK_NOT(cryptor2.refresh_iv(fd, i)); + } + + // Nothing's changed so rereading them should report no refresh needed + cryptor2.invalidate_ivs(); + for (size_t i = 0; i < page_count; ++i) { + CHECK_NOT(cryptor2.refresh_iv(fd, i)); + } + + // Modify all pages, invalidate, verify each page needs to be refreshed + // Note that even though this isn't changing the plaintext it does update + // the ciphertext each time + for (File::SizeType i = 0; i < data_size; i += page_size) { + cryptor.write(fd, i, data); + } + cryptor2.invalidate_ivs(); + for (size_t i = 0; i < page_count; ++i) { + CHECK(cryptor2.refresh_iv(fd, i)); + // refresh_iv only returns true once per page per write + CHECK_NOT(cryptor2.refresh_iv(fd, i)); + } + + // Modify all pages, verifying that a refresh is needed after each one + for (size_t i = 0; i < page_count; ++i) { + cryptor.write(fd, File::SizeType(i) * page_size, data); + cryptor2.invalidate_ivs(); + CHECK(cryptor2.refresh_iv(fd, i)); + CHECK_NOT(cryptor2.refresh_iv(fd, i)); + } + + // Same thing but in reverse. This verifies that initialization of data + // before the earliest populated point is tracked correctly + for (size_t i = page_count; i > 0; --i) { + cryptor.write(fd, File::SizeType(i - 1) * page_size, data); + cryptor2.invalidate_ivs(); + CHECK(cryptor2.refresh_iv(fd, i - 1)); + CHECK_NOT(cryptor2.refresh_iv(fd, i - 1)); + } } -TEST(EncryptedFile_IVRefreshing) +TEST(EncryptedFile_NonPageAlignedMapping) { - using IVPageStates = realm::util::FlatMap; - constexpr size_t block_size = 4096; - constexpr size_t blocks_per_metadata_block = 64; - const size_t pages_per_metadata_block = block_size * blocks_per_metadata_block / page_size(); - - auto verify_page_states = [&](const IVPageStates& states, off_t data_pos, - std::vector expected_pages_refreshed) { - size_t start_page_ndx = ((data_pos / block_size) / blocks_per_metadata_block) * blocks_per_metadata_block * - block_size / page_size(); - size_t end_page_ndx = (((data_pos / block_size) + blocks_per_metadata_block) / blocks_per_metadata_block) * - blocks_per_metadata_block * block_size / page_size(); - - CHECK_EQUAL(states.size(), end_page_ndx - start_page_ndx); - for (size_t ndx = start_page_ndx; ndx < end_page_ndx; ++ndx) { - CHECK_EQUAL(states.count(ndx), 1); - bool expected_refresh = std::find(expected_pages_refreshed.begin(), expected_pages_refreshed.end(), - ndx) != expected_pages_refreshed.end(); - CHECK(states.at(ndx) == (expected_refresh ? IVRefreshState::RequiresRefresh : IVRefreshState::UpToDate)); + TEST_PATH(path); + { + File f(path, File::mode_Write); + f.set_encryption_key(test_util::crypt_key(true)); + f.resize(page_size() * 2); + // Since no power-of-two page size is a multiple of 11, one of these + // mapping will straddle a page + for (size_t pos = 0; pos + 10 <= page_size() * 2; pos += 11) { + File::Map map(f, pos, File::access_ReadWrite, 10); + util::encryption_read_barrier(map, 0, 10); + for (int i = 0; i < 10; ++i) + map.get_addr()[i] = char(i + 1); + util::encryption_write_barrier(map, 0, 10); } - }; + } + { + File f(path, File::mode_Read); + f.set_encryption_key(test_util::crypt_key(true)); + for (size_t pos = 0; pos + 17 <= page_size() * 2; pos += 7) { + File::Map map(f, pos, File::access_ReadOnly, 6); + util::encryption_read_barrier(map, 0, 6); + for (int i = 0; i < 6; ++i) + CHECK_EQUAL(int(map.get_addr()[i]), (pos + i + 1) % 11); + } + } +} +TEST(EncryptedFile_GapsOfNeverWrittenPages) +{ + constexpr size_t page_count = 128; TEST_PATH(path); - // enough data to span two metadata blocks - constexpr size_t data_size = block_size * blocks_per_metadata_block * 2; - const size_t num_pages = data_size / page_size(); - char data[block_size]; - for (size_t i = 0; i < sizeof(data); ++i) - data[i] = static_cast(i); - AESCryptor cryptor(test_key); - cryptor.set_file_size(off_t(data_size)); - File file(path, realm::util::File::mode_Write); - const FileDesc fd = file.get_descriptor(); + // Write to every other page. Note that on 16k systems this is actually + // writing to 4 pages and then skipping 4 pages, which achieves the same + // goal. + { + File f(path, File::mode_Write); + f.set_encryption_key(test_util::crypt_key(true)); + f.resize(page_size() * page_count); + for (size_t i = 0; i < page_count; i += 2) { + File::Map map(f, i * page_size(), File::access_ReadWrite, page_size()); + util::encryption_read_barrier(map, 0, page_size()); + std::fill(map.get_addr(), map.get_addr() + map.get_size(), 1); + util::encryption_write_barrier(map, 0, page_size()); + } + } - auto make_external_write_at_pos = [&](off_t data_pos) -> size_t { - const size_t begin_write_block = data_pos / block_size * block_size; - const size_t ndx_in_block = data_pos % block_size; - AESCryptor cryptor2(test_key); - cryptor2.set_file_size(off_t(data_size)); - cryptor2.read(fd, off_t(begin_write_block), data, block_size); - ++data[ndx_in_block]; - cryptor2.write(fd, off_t(begin_write_block), data, block_size); - return data_pos / page_size(); - }; - - for (size_t i = 0; i < data_size; i += block_size) { - cryptor.write(fd, off_t(i), data, block_size); + // Trying to read via a single large read barrier should fail since it + // includes never-written pages + { + File f(path, File::mode_Read); + f.set_encryption_key(test_util::crypt_key(true)); + File::Map map(f, 0, File::access_ReadOnly, page_count * page_size()); + CHECK_THROW(util::encryption_read_barrier(map, 0, map.get_size()), DecryptionFailed); } - IVPageStates states = cryptor.refresh_ivs(fd, 0, 0, num_pages); - std::vector pages_needing_refresh = {}; - for (size_t i = 0; i < pages_per_metadata_block; ++i) { - pages_needing_refresh.push_back(i); + // A single large read mapping that only has barriers for the written pages + // should work + { + File f(path, File::mode_Read); + f.set_encryption_key(test_util::crypt_key(true)); + File::Map map(f, 0, File::access_ReadOnly, page_count * page_size()); + for (size_t i = 0; i < page_count; i += 2) { + util::encryption_read_barrier(map, i * page_size(), page_size()); + for (size_t j = 0; j < page_size(); ++j) { + CHECK_EQUAL(int(map.get_addr()[i * page_size() + j]), 1); + } + } + + // And reading the unwritten pages should throw + for (size_t i = 1; i < page_count; i += 2) { + CHECK_THROW(util::encryption_read_barrier(map, 0, map.get_size()), DecryptionFailed); + } } - // initial call requires refreshing all pages in range - verify_page_states(states, 0, pages_needing_refresh); - states = cryptor.refresh_ivs(fd, 0, 0, num_pages); - // subsequent call does not require refreshing anything - verify_page_states(states, 0, {}); - - pages_needing_refresh = {}; - for (size_t i = 0; i < pages_per_metadata_block; ++i) { - pages_needing_refresh.push_back(i + pages_per_metadata_block); + + // Reading the whole thing via a write mapping should work, as those are + // allowed to see uninitialized data + { + File f(path, File::mode_Update); + f.set_encryption_key(test_util::crypt_key(true)); + File::Map map(f, 0, File::access_ReadWrite, page_count * page_size()); + util::encryption_read_barrier(map, 0, map.get_size()); + + for (size_t i = 0; i < page_count; ++i) { + const int expected = (i + 1) % 2; + for (size_t j = 0; j < page_size(); ++j) { + CHECK_EQUAL(int(map.get_addr()[i * page_size() + j]), expected); + } + } + util::encryption_write_barrier(map, 0, map.get_size()); } - off_t read_data_pos = off_t(pages_per_metadata_block * page_size()); - states = cryptor.refresh_ivs(fd, read_data_pos, pages_per_metadata_block, num_pages); - verify_page_states(states, read_data_pos, pages_needing_refresh); - states = cryptor.refresh_ivs(fd, read_data_pos, pages_per_metadata_block, num_pages); - verify_page_states(states, read_data_pos, {}); - - read_data_pos = off_t(data_size / 2); - size_t read_page_ndx = read_data_pos / page_size(); - states = cryptor.refresh_ivs(fd, read_data_pos, read_page_ndx, num_pages); - verify_page_states(states, read_data_pos, {}); - - read_data_pos = off_t(data_size - 1); - read_page_ndx = read_data_pos / page_size(); - states = cryptor.refresh_ivs(fd, read_data_pos, read_page_ndx, num_pages); - verify_page_states(states, read_data_pos, {}); - - // write at pos 0, read half way through the first page - make_external_write_at_pos(0); - read_data_pos = off_t(page_size() / 2); - states = cryptor.refresh_ivs(fd, read_data_pos, 0, num_pages); - verify_page_states(states, read_data_pos, {0}); - - // write at end of first page, read half way through first page - make_external_write_at_pos(off_t(page_size() - 1)); - read_data_pos = off_t(page_size() / 2); - states = cryptor.refresh_ivs(fd, read_data_pos, 0, num_pages); - verify_page_states(states, read_data_pos, {0}); - - // write at beginning of second page, read in first page - make_external_write_at_pos(off_t(page_size())); - read_data_pos = off_t(page_size() / 2); - states = cryptor.refresh_ivs(fd, read_data_pos, 0, num_pages); - verify_page_states(states, read_data_pos, {1}); - - // write at last page of first metadata block, read in first page - size_t page_needing_refresh = make_external_write_at_pos(blocks_per_metadata_block * block_size - 1); - read_data_pos = off_t(page_size() / 2); - states = cryptor.refresh_ivs(fd, read_data_pos, 0, num_pages); - verify_page_states(states, read_data_pos, {page_needing_refresh}); - - // test truncation of end_page: write to first page, and last page of first metadata block, read in first page, - // but set the end page index lower than the last write - make_external_write_at_pos(0); - page_needing_refresh = make_external_write_at_pos(blocks_per_metadata_block * block_size - 1); - REALM_ASSERT(page_needing_refresh >= 1); // this test assumes page_size is < 64 * block_size - read_data_pos = off_t(0); - constexpr size_t end_page_index = 1; - states = cryptor.refresh_ivs(fd, read_data_pos, 0, end_page_index); - CHECK_EQUAL(states.size(), 1); - CHECK_EQUAL(states.count(size_t(0)), 1); - CHECK(states[0] == IVRefreshState::RequiresRefresh); - states = cryptor.refresh_ivs(fd, read_data_pos, 0, num_pages); - verify_page_states(states, 0, {page_needing_refresh}); - - // write to a block indexed to the second metadata block - page_needing_refresh = make_external_write_at_pos(blocks_per_metadata_block * block_size); - // a read anywhere in the first metadata block domain does not require refresh - read_data_pos = off_t(page_size() / 2); - states = cryptor.refresh_ivs(fd, read_data_pos, 0, num_pages); - verify_page_states(states, read_data_pos, {}); - // but a read in a page controlled by the second metadata block does require a refresh - read_data_pos = off_t(blocks_per_metadata_block * block_size); - states = cryptor.refresh_ivs(fd, read_data_pos, page_needing_refresh, num_pages); - verify_page_states(states, read_data_pos, {page_needing_refresh}); - - // write to the last byte of data - page_needing_refresh = make_external_write_at_pos(data_size - 1); - // a read anywhere in the first metadata block domain does not require refresh - read_data_pos = 0; - states = cryptor.refresh_ivs(fd, read_data_pos, 0, num_pages); - verify_page_states(states, read_data_pos, {}); - // but a read in a page controlled by the second metadata block does require a refresh - read_data_pos = off_t(data_size - 1); - states = cryptor.refresh_ivs(fd, read_data_pos, page_needing_refresh, num_pages); - verify_page_states(states, read_data_pos, {page_needing_refresh}); } -static void check_attach_and_read(const char* key, const std::string& path, size_t num_entries) +TEST(EncryptedFile_MultipleWriterMappings) { - try { - auto hist = make_in_realm_history(); - DBOptions options(key); - auto sg = DB::create(*hist, path, options); - auto rt = sg->start_read(); - auto foo = rt->get_table("foo"); - auto pk_col = foo->get_primary_key_column(); - REALM_ASSERT_3(foo->size(), ==, num_entries); - REALM_ASSERT_3(foo->where().equal(pk_col, util::format("name %1", num_entries - 1).c_str()).count(), ==, 1); + const size_t count = 4096 * 64 * 2; // i.e. two metablocks of data + const size_t increments = 100; + TEST_PATH(path); + + { + File w(path, File::mode_Write); + w.set_encryption_key(test_util::crypt_key(true)); + w.resize(count); + File::Map map1(w, File::access_ReadWrite, count); + File::Map map2(w, File::access_ReadWrite, count); + + for (size_t i = 0; i < count; i += increments) { + util::encryption_read_barrier(map1, i); + map1.get_addr()[i] = 1; + realm::util::encryption_write_barrier(map1, i); + } + + // Since these are multiple mappings from one File, they should see + // each other's writes without flushing in between + for (size_t i = 0; i < count; i += increments) { + util::encryption_read_barrier(map1, i, 1); + ++map1.get_addr()[i]; + realm::util::encryption_write_barrier(map1, i); + util::encryption_read_barrier(map2, i, 1); + ++map2.get_addr()[i]; + realm::util::encryption_write_barrier(map2, i); + } } - catch (const std::exception& e) { - auto fs = File::get_size_static(path); - util::format(std::cout, "Error for num_entries %1 with page_size of %2 on file of size %3\n%4", num_entries, - page_size(), fs, e.what()); - throw; + + File reader(path, File::mode_Read); + reader.set_encryption_key(test_util::crypt_key(true)); + + File::Map read(reader, File::access_ReadOnly, count); + util::encryption_read_barrier(read, 0, count); + for (size_t i = 0; i < count; i += increments) { + if (!CHECK_EQUAL(int(read.get_addr()[i]), 3)) + return; } } -// This test changes the global page_size() and should not run with other tests. -// It checks that an encrypted Realm is portable between systems with a different page size -NONCONCURRENT_TEST(EncryptedFile_Portablility) +TEST(EncryptedFile_MultipleWriterFiles) { - const char* key = test_util::crypt_key(true); - // The idea here is to incrementally increase the allocations in the Realm - // such that the top ref written eventually crosses over the block_size and - // page_size() thresholds. This has caught faulty top_ref + size calculations. - std::vector test_sizes; -#if TEST_DURATION == 0 - test_sizes.resize(100); - std::iota(test_sizes.begin(), test_sizes.end(), 500); - // The allocations are not controlled, but at the time of writing this test - // 539 objects produced a file of size 16384 while 540 objects produced a file of size 20480 - // so at least one threshold is crossed here, though this may change if the allocator changes - // or if compression is implemented -#else - test_sizes.resize(5000); - std::iota(test_sizes.begin(), test_sizes.end(), 500); -#endif - - test_sizes.push_back(1); // check the lower limit - for (auto num_entries : test_sizes) { - TEST_PATH(path); - { - // create the Realm with the smallest supported page_size() of 4096 - OnlyForTestingPageSizeChange change_page_size(4096); - Group g; - TableRef foo = g.add_table_with_primary_key("foo", type_String, "name", false); - for (size_t i = 0; i < num_entries; ++i) { - foo->create_object_with_primary_key(util::format("name %1", i)); - } - g.write(path, key); - // size_t fs = File::get_size_static(path); - // util::format(std::cout, "write of %1 objects produced a file of size %2\n", num_entries, fs); + const size_t count = 4096 * 64 * 2; // i.e. two metablocks of data + const size_t increments = 100; + TEST_PATH(path); + + { + File w1(path, File::mode_Write); + w1.set_encryption_key(test_util::crypt_key(true)); + w1.resize(count); + File::Map map1(w1, File::access_ReadWrite, count); + + File w2(path, File::mode_Update); + w2.set_encryption_key(test_util::crypt_key(true)); + File::Map map2(w2, File::access_ReadWrite, count); + + for (size_t i = 0; i < count; i += increments) { + util::encryption_read_barrier(map1, i); + map1.get_addr()[i] = 1; + realm::util::encryption_write_barrier(map1, i); + } + map1.flush(); + + for (size_t i = 0; i < count; i += increments) { + util::encryption_read_barrier(map1, i, 1); + ++map1.get_addr()[i]; + realm::util::encryption_write_barrier(map1, i); + map1.flush(); + w2.get_encryption()->mark_data_as_possibly_stale(); + + util::encryption_read_barrier(map2, i, 1); + ++map2.get_addr()[i]; + realm::util::encryption_write_barrier(map2, i); + map2.flush(); + w1.get_encryption()->mark_data_as_possibly_stale(); } + } + + File reader(path, File::mode_Read); + reader.set_encryption_key(test_util::crypt_key(true)); + + File::Map read(reader, File::access_ReadOnly, count); + util::encryption_read_barrier(read, 0, count); + for (size_t i = 0; i < count; i += increments) { + if (!CHECK_EQUAL(int(read.get_addr()[i]), 3)) + return; + } +} + +TEST(EncryptedFile_MultipleReaders) +{ + const size_t count = 4096 * 64 * 2; // i.e. two metablocks of data + const size_t increments = 100; + TEST_PATH(path); + + File w1(path, File::mode_Write); + w1.set_encryption_key(test_util::crypt_key(true)); + w1.resize(count); + File::Map map1(w1, File::access_ReadWrite, count); + File::Map map2(w1, File::access_ReadOnly, count); + + File w2(path, File::mode_Read); + w2.set_encryption_key(test_util::crypt_key(true)); + File::Map map3(w2, File::access_ReadOnly, count); + + for (size_t i = 0; i < count; i += increments) { + util::encryption_read_barrier(map1, i); + map1.get_addr()[i] = 1; + realm::util::encryption_write_barrier(map1, i); + } + map1.flush(); + + // Bring both readers fully up to date + util::encryption_read_barrier(map2, 0, count); + util::encryption_read_barrier(map3, 0, count); + + for (size_t i = 0; i < count; i += increments) { + util::encryption_read_barrier(map1, i, 1); + ++map1.get_addr()[i]; + realm::util::encryption_write_barrier(map1, i); + + // map1 sees the new value because the write was performed via it + // map2 was updated in the write barrier since it's the same File + // map3 is viewing stale data but hasn't been told to refresh + CHECK_EQUAL(map1.get_addr()[i], 2); + CHECK_EQUAL(map2.get_addr()[i], 2); + CHECK_EQUAL(map3.get_addr()[i], 1); + + // Read barrier is a no-op because of no call to mark_data_as_possibly_stale() + util::encryption_read_barrier(map3, i, 1); + CHECK_EQUAL(map3.get_addr()[i], 1); + + map1.flush(true); + w2.get_encryption()->mark_data_as_possibly_stale(); + + // Still see the old value since no read barrier + CHECK_EQUAL(map3.get_addr()[i], 1); + + // Now finally brought up to date + util::encryption_read_barrier(map3, i, 1); + CHECK_EQUAL(map3.get_addr()[i], 2); + } +} + +TEST(EncryptedFile_IVsAreRereadOnlyWhenObserverIsPresent) +{ + TEST_PATH(path); + const size_t page_size = 4096; + const size_t size = page_size * 64; + File w(path, File::mode_Write); + w.set_encryption_key(test_util::crypt_key(true)); + w.resize(size); + + // Initialize all of the pages so iv1 is non-zero + File::Map map_w(w, File::access_ReadWrite, size); + encryption_read_barrier(map_w, 0, size); + encryption_write_barrier(map_w, 0, size); + map_w.flush(); + + File r(path, File::mode_Read); + r.set_encryption_key(test_util::crypt_key(true)); + File::Map map_r1(r, File::access_ReadOnly, size); + File::Map map_r2(r, File::access_ReadOnly, size); + File::Map map_r3(r, File::access_ReadOnly, size); + + struct : WriteObserver { + bool no_concurrent_writer_seen() override { - OnlyForTestingPageSizeChange change_page_size(8192); - check_attach_and_read(key, path, num_entries); + return true; } + } r2_observer; + map_r2.get_encrypted_mapping()->set_observer(&r2_observer); + + struct : WriteObserver { + bool no_concurrent_writer_seen() override { - OnlyForTestingPageSizeChange change_page_size(16384); - check_attach_and_read(key, path, num_entries); + return false; } + } r3_observer; + map_r3.get_encrypted_mapping()->set_observer(&r3_observer); + + // Reads the entire IV block and first page of data + encryption_read_barrier(map_r1, 0, page_size); + encryption_read_barrier(map_r2, 0, page_size); + encryption_read_barrier(map_r3, 0, page_size); + + encryption_read_barrier(map_w, page_size, size - page_size); + encryption_write_barrier(map_w, page_size, size - page_size); + map_w.flush(); + + // No observer, so it uses the cached IV/hmac + CHECK_THROW(encryption_read_barrier(map_r1, page_size, 1), DecryptionFailed); + // Observer says no concurrent writers, so it uses the cached IV/hmac + CHECK_THROW(encryption_read_barrier(map_r2, page_size, 1), DecryptionFailed); + // Observer says there are concurrent writers, so it rereads the IV after + // decryption fails the first time + encryption_read_barrier(map_r3, page_size, 1); +} + +TEST(EncryptedFile_Truncation) +{ + TEST_PATH(path); + const size_t page_size = 4096; + const size_t size = page_size * 64; + File w(path, File::mode_Write); + w.set_encryption_key(test_util::crypt_key(true)); + w.resize(size); + + { + // Initialize all of the pages so iv1 is non-zero + File::Map map(w, File::access_ReadWrite, size); + encryption_read_barrier(map, 0, size); + encryption_write_barrier(map, 0, size); + } + + // Truncate and then re-expand the file + w.resize(size / 2); + w.resize(size); + + { + File::Map map(w, File::access_ReadOnly, size); + // Trying to read the entire file fails because it's trying to read + // uninitialized data + CHECK_THROW(encryption_read_barrier(map, 0, size), DecryptionFailed); + // Reading just the valid part works + CHECK_NOTHROW(encryption_read_barrier(map, 0, size / 2)); + } + + { + // Write mapping can read the entire file + File::Map map(w, File::access_ReadWrite, size); + encryption_read_barrier(map, 0, size); + encryption_write_barrier(map, 0, size); + } +} + +TEST(EncryptedFile_RacingReadAndWrite) +{ + TEST_PATH(path); + static constexpr size_t page_size = 4096; + static constexpr size_t page_count = 64; + static constexpr size_t size = page_size * page_count; + + { + // Initialize the file + File w(path, File::mode_Write); + w.set_encryption_key(test_util::crypt_key(true)); + w.resize(size); + File::Map map(w, File::access_ReadWrite, size); + encryption_read_barrier(map, 0, size); + encryption_write_barrier(map, 0, size); + map.flush(); + } - // check with the native page_size (which is probably redundant with one of the above) - // and check that a write works correctly - auto history = make_in_realm_history(); - DBOptions options(key); - DBRef db = DB::create(*history, path, options); - auto wt = db->start_write(); - TableRef bar = wt->get_or_add_table_with_primary_key("bar", type_String, "pk"); - bar->create_object_with_primary_key("test"); - wt->commit(); - check_attach_and_read(key, path, num_entries); + File w(path, File::mode_Update); + // note: not setting encryption key + // Flip some bits in the encrypted file to make it invalid + for (File::SizeType pos = int(page_size); pos < w.get_size(); pos += page_size) { + char c; + w.read(pos, &c, 1); + c = ~c; + w.write(pos, &c, 1); + } + + struct : WriteObserver { + size_t page = 0; + size_t count = 0; + AESCryptor cryptor{test_util::crypt_key(true)}; + util::File* file; + + bool no_concurrent_writer_seen() override + { + // The first 15 read attempts we modify the page so that it + // continues trying to reread past the normal limit of 5 attempts, + // but we continue to leave the page in an invalid state + if (++count < 15) { + auto pos = (page + 1) * page_size + 1; + char c; + file->read(pos, &c, 1); + ++c; + file->write(pos, &c, 1); + return false; + } + + // Now we write valid encrypted data which will result in the + // decryption succeeding + count = 0; + char buffer[page_size] = {0}; + cryptor.write(file->get_descriptor(), page * page_size, buffer); + return false; + } + } observer; + observer.file = &w; + observer.cryptor.set_data_size(File::SizeType(size)); + + File r(path, File::mode_Read); + r.set_encryption_key(test_util::crypt_key(true)); + for (size_t i = 0; i < page_count; ++i) { + observer.page = i; + File::Map map(r, i * page_size); + map.get_encrypted_mapping()->set_observer(&observer); + util::encryption_read_barrier(map, 0); } } diff --git a/test/test_file.cpp b/test/test_file.cpp index 22ec9fd48ad..2f4fb1315e5 100644 --- a/test/test_file.cpp +++ b/test/test_file.cpp @@ -23,6 +23,7 @@ #include #include +#include #include #include @@ -113,7 +114,7 @@ TEST(File_Streambuf) { File f(path, File::mode_Read); char buffer[256]; - size_t n = f.read(buffer); + size_t n = f.read(0, buffer); std::string s_1(buffer, buffer + n); std::ostringstream out; out << "Line " << 1 << std::endl; @@ -123,33 +124,32 @@ TEST(File_Streambuf) } } - -TEST(File_Map) +TEST_TYPES(File_Map, std::true_type, std::false_type) { TEST_PATH(path); const char data[4096] = "12345678901234567890"; size_t len = strlen(data); { File f(path, File::mode_Write); - f.set_encryption_key(crypt_key()); + f.set_encryption_key(crypt_key(TEST_TYPE::value)); f.resize(len); File::Map map(f, File::access_ReadWrite, len); - realm::util::encryption_read_barrier(map, 0, len); + util::encryption_read_barrier(map, 0, len); memcpy(map.get_addr(), data, len); realm::util::encryption_write_barrier(map, 0, len); } { File f(path, File::mode_Read); - f.set_encryption_key(crypt_key()); + f.set_encryption_key(crypt_key(TEST_TYPE::value)); File::Map map(f, File::access_ReadOnly, len); - realm::util::encryption_read_barrier(map, 0, len); + util::encryption_read_barrier(map, 0, len); CHECK(memcmp(map.get_addr(), data, len) == 0); } } -TEST(File_MapMultiplePages) +TEST_TYPES(File_MapMultiplePages, std::true_type, std::false_type) { // two blocks of IV tables const size_t count = 4096 / sizeof(size_t) * 256 * 2; @@ -157,20 +157,20 @@ TEST(File_MapMultiplePages) TEST_PATH(path); { File f(path, File::mode_Write); - f.set_encryption_key(crypt_key()); + f.set_encryption_key(crypt_key(TEST_TYPE::value)); f.resize(count * sizeof(size_t)); File::Map map(f, File::access_ReadWrite, count * sizeof(size_t)); - realm::util::encryption_read_barrier(map, 0, count); + util::encryption_read_barrier(map, 0, count); for (size_t i = 0; i < count; ++i) map.get_addr()[i] = i; realm::util::encryption_write_barrier(map, 0, count); } { File f(path, File::mode_Read); - f.set_encryption_key(crypt_key()); + f.set_encryption_key(crypt_key(TEST_TYPE::value)); File::Map map(f, File::access_ReadOnly, count * sizeof(size_t)); - realm::util::encryption_read_barrier(map, 0, count); + util::encryption_read_barrier(map, 0, count); for (size_t i = 0; i < count; ++i) { CHECK_EQUAL(map.get_addr()[i], i); if (map.get_addr()[i] != i) @@ -179,35 +179,60 @@ TEST(File_MapMultiplePages) } } -TEST(File_ReaderAndWriter) +TEST_TYPES(File_ReaderAndWriter_SingleFile, std::true_type, std::false_type) +{ + const size_t count = 4096 / sizeof(size_t) * 256 * 2; + + TEST_PATH(path); + + File file(path, File::mode_Write); + file.set_encryption_key(crypt_key(TEST_TYPE::value)); + file.resize(count * sizeof(size_t)); + + File::Map write(file, File::access_ReadWrite, count * sizeof(size_t)); + File::Map read(file, File::access_ReadOnly, count * sizeof(size_t)); + + for (size_t i = 0; i < count; i += 100) { + util::encryption_read_barrier(write, i, 1); + write.get_addr()[i] = i; + realm::util::encryption_write_barrier(write, i); + util::encryption_read_barrier(read, i); + if (!CHECK_EQUAL(read.get_addr()[i], i)) + return; + } +} + +TEST_TYPES(File_ReaderAndWriter_MulitpleFiles, std::true_type, std::false_type) { const size_t count = 4096 / sizeof(size_t) * 256 * 2; TEST_PATH(path); File writer(path, File::mode_Write); - writer.set_encryption_key(crypt_key()); + writer.set_encryption_key(crypt_key(TEST_TYPE::value)); writer.resize(count * sizeof(size_t)); File reader(path, File::mode_Read); - reader.set_encryption_key(crypt_key()); + reader.set_encryption_key(crypt_key(TEST_TYPE::value)); CHECK_EQUAL(writer.get_size(), reader.get_size()); File::Map write(writer, File::access_ReadWrite, count * sizeof(size_t)); File::Map read(reader, File::access_ReadOnly, count * sizeof(size_t)); for (size_t i = 0; i < count; i += 100) { - realm::util::encryption_read_barrier(write, i, 1); + util::encryption_read_barrier(write, i, 1); write.get_addr()[i] = i; realm::util::encryption_write_barrier(write, i); - realm::util::encryption_read_barrier(read, i); - CHECK_EQUAL(read.get_addr()[i], i); - if (read.get_addr()[i] != i) + write.flush(true); + if (auto encryption = reader.get_encryption()) + encryption->mark_data_as_possibly_stale(); + util::encryption_read_barrier(read, i); + if (!CHECK_EQUAL(read.get_addr()[i], i)) return; } } -TEST(File_Offset) +TEST_TYPES(File_Offset, std::true_type, std::false_type) { const size_t size = page_size(); const size_t count_per_page = size / sizeof(size_t); @@ -217,13 +242,13 @@ TEST(File_Offset) TEST_PATH(path); { File f(path, File::mode_Write); - f.set_encryption_key(crypt_key()); + f.set_encryption_key(crypt_key(TEST_TYPE::value)); f.resize(page_count * size); for (size_t i = 0; i < page_count; ++i) { File::Map map(f, i * size, File::access_ReadWrite, size); for (size_t j = 0; j < count_per_page; ++j) { - realm::util::encryption_read_barrier(map, j, 1); + util::encryption_read_barrier(map, j, 1); map.get_addr()[j] = i * size + j; realm::util::encryption_write_barrier(map, j); } @@ -231,11 +256,11 @@ TEST(File_Offset) } { File f(path, File::mode_Read); - f.set_encryption_key(crypt_key()); + f.set_encryption_key(crypt_key(TEST_TYPE::value)); for (size_t i = 0; i < page_count; ++i) { File::Map map(f, i * size, File::access_ReadOnly, size); for (size_t j = 0; j < count_per_page; ++j) { - realm::util::encryption_read_barrier(map, j); + util::encryption_read_barrier(map, j); CHECK_EQUAL(map.get_addr()[j], i * size + j); if (map.get_addr()[j] != i * size + j) return; @@ -244,25 +269,60 @@ TEST(File_Offset) } } +TEST_TYPES(File_MultipleWriters_SingleFile, std::true_type, std::false_type) +{ + const size_t count = 4096 / sizeof(size_t) * 256 * 2; + const size_t increments = 100; + TEST_PATH(path); + + { + File w(path, File::mode_Write); + w.set_encryption_key(crypt_key(TEST_TYPE::value)); + w.resize(count * sizeof(size_t)); + File::Map map1(w, File::access_ReadWrite, count * sizeof(size_t)); + File::Map map2(w, File::access_ReadWrite, count * sizeof(size_t)); + + // Place zeroes in selected places + for (size_t i = 0; i < count; i += increments) { + util::encryption_read_barrier(map1, i); + map1.get_addr()[i] = 0; + realm::util::encryption_write_barrier(map1, i); + } + + for (size_t i = 0; i < count; i += increments) { + util::encryption_read_barrier(map1, i, 1); + ++map1.get_addr()[i]; + realm::util::encryption_write_barrier(map1, i); + util::encryption_read_barrier(map2, i, 1); + ++map2.get_addr()[i]; + realm::util::encryption_write_barrier(map2, i); + } + } + + File reader(path, File::mode_Read); + reader.set_encryption_key(crypt_key(TEST_TYPE::value)); -TEST(File_MultipleWriters) + File::Map read(reader, File::access_ReadOnly, count * sizeof(size_t)); + util::encryption_read_barrier(read, 0, count); + for (size_t i = 0; i < count; i += increments) { + if (!CHECK_EQUAL(read.get_addr()[i], 2)) + return; + } +} + +TEST_TYPES(File_MultipleWriters_MultipleFiles, std::true_type, std::false_type) { const size_t count = 4096 / sizeof(size_t) * 256 * 2; -#if defined(_WIN32) && defined(REALM_ENABLE_ENCRYPTION) - // This test runs really slow on Windows with encryption - const size_t increments = 3000; -#else const size_t increments = 100; -#endif TEST_PATH(path); { File w1(path, File::mode_Write); - w1.set_encryption_key(crypt_key()); + w1.set_encryption_key(crypt_key(TEST_TYPE::value)); w1.resize(count * sizeof(size_t)); File w2(path, File::mode_Write); - w2.set_encryption_key(crypt_key()); + w2.set_encryption_key(crypt_key(TEST_TYPE::value)); w2.resize(count * sizeof(size_t)); File::Map map1(w1, File::access_ReadWrite, count * sizeof(size_t)); @@ -270,34 +330,40 @@ TEST(File_MultipleWriters) // Place zeroes in selected places for (size_t i = 0; i < count; i += increments) { - realm::util::encryption_read_barrier(map1, i); + encryption_read_barrier(map1, i); map1.get_addr()[i] = 0; - realm::util::encryption_write_barrier(map1, i); + encryption_write_barrier(map1, i); } + map1.flush(); for (size_t i = 0; i < count; i += increments) { - realm::util::encryption_read_barrier(map1, i, 1); + util::encryption_read_barrier(map1, i, 1); ++map1.get_addr()[i]; - realm::util::encryption_write_barrier(map1, i); - realm::util::encryption_read_barrier(map2, i, 1); + encryption_write_barrier(map1, i); + map1.flush(true); + if (auto encryption = w2.get_encryption()) + encryption->mark_data_as_possibly_stale(); + + util::encryption_read_barrier(map2, i, 1); ++map2.get_addr()[i]; - realm::util::encryption_write_barrier(map2, i); + encryption_write_barrier(map2, i); + map2.flush(true); + if (auto encryption = w1.get_encryption()) + encryption->mark_data_as_possibly_stale(); } } File reader(path, File::mode_Read); - reader.set_encryption_key(crypt_key()); + reader.set_encryption_key(crypt_key(TEST_TYPE::value)); File::Map read(reader, File::access_ReadOnly, count * sizeof(size_t)); - realm::util::encryption_read_barrier(read, 0, count); + util::encryption_read_barrier(read, 0, count); for (size_t i = 0; i < count; i += increments) { - CHECK_EQUAL(read.get_addr()[i], 2); - if (read.get_addr()[i] != 2) + if (!CHECK_EQUAL(read.get_addr()[i], 2)) return; } } - TEST(File_SetEncryptionKey) { TEST_PATH(path); @@ -320,28 +386,27 @@ TEST(File_ReadWrite) f.resize(100); for (char i = 0; i < 100; ++i) - f.write(&i, 1); - f.seek(0); + f.write(i, &i, 1); for (char i = 0; i < 100; ++i) { char read; - f.read(&read, 1); + f.read(i, &read, 1); CHECK_EQUAL(i, read); } } -TEST(File_Resize) +TEST_TYPES(File_Resize, std::true_type, std::false_type) { TEST_PATH(path); File f(path, File::mode_Write); - f.set_encryption_key(crypt_key()); + f.set_encryption_key(crypt_key(TEST_TYPE::value)); f.resize(page_size() * 2); CHECK_EQUAL(page_size() * 2, f.get_size()); { File::Map m(f, File::access_ReadWrite, page_size() * 2); for (unsigned int i = 0; i < page_size() * 2; ++i) { - realm::util::encryption_read_barrier(m, i, 1); + util::encryption_read_barrier(m, i, 1); m.get_addr()[i] = static_cast(i); realm::util::encryption_write_barrier(m, i); } @@ -352,7 +417,7 @@ TEST(File_Resize) // encrypted data there, so flush and write a second time m.sync(); for (unsigned int i = 0; i < page_size() * 2; ++i) { - realm::util::encryption_read_barrier(m, i, 1); + util::encryption_read_barrier(m, i, 1); m.get_addr()[i] = static_cast(i); realm::util::encryption_write_barrier(m, i); } @@ -363,7 +428,7 @@ TEST(File_Resize) { File::Map m(f, File::access_ReadOnly, page_size()); for (unsigned int i = 0; i < page_size(); ++i) { - realm::util::encryption_read_barrier(m, i); + util::encryption_read_barrier(m, i); CHECK_EQUAL(static_cast(i), m.get_addr()[i]); if (static_cast(i) != m.get_addr()[i]) return; @@ -375,7 +440,7 @@ TEST(File_Resize) { File::Map m(f, File::access_ReadWrite, page_size() * 2); for (unsigned int i = 0; i < page_size() * 2; ++i) { - realm::util::encryption_read_barrier(m, i, 1); + util::encryption_read_barrier(m, i, 1); m.get_addr()[i] = static_cast(i); realm::util::encryption_write_barrier(m, i); } @@ -383,7 +448,7 @@ TEST(File_Resize) { File::Map m(f, File::access_ReadOnly, page_size() * 2); for (unsigned int i = 0; i < page_size() * 2; ++i) { - realm::util::encryption_read_barrier(m, i); + util::encryption_read_barrier(m, i); CHECK_EQUAL(static_cast(i), m.get_addr()[i]); if (static_cast(i) != m.get_addr()[i]) return; @@ -431,81 +496,30 @@ TEST(File_Move) CHECK_NOT(file_2.is_attached()); } -#if 0 -TEST(File_PreallocResizing) -{ - TEST_PATH(path); - File file(path, File::mode_Write); - CHECK(file.is_attached()); - // we cannot test this with encryption...prealloc always allocates a full page - file.prealloc(0); // this is allowed - CHECK_EQUAL(file.get_size(), 0); - file.prealloc(100); - CHECK_EQUAL(file.get_size(), 100); - file.prealloc(50); - CHECK_EQUAL(file.get_size(), 100); // prealloc does not reduce size - - // To expose the preallocation bug, we need to iterate over a large numbers, less than 4096. - // If the bug is present, we will allocate additional space to the file on every call, but if it is - // not present, the OS will preallocate 4096 only on the first call. - constexpr size_t init_size = 2048; - constexpr size_t dest_size = 3000; - for (size_t prealloc_space = init_size; prealloc_space <= dest_size; ++prealloc_space) { - file.prealloc(prealloc_space); - CHECK_EQUAL(file.get_size(), prealloc_space); - } - -#if REALM_PLATFORM_APPLE - int fd = ::open(path.c_str(), O_RDONLY); - CHECK(fd >= 0); - struct stat statbuf; - CHECK(fstat(fd, &statbuf) == 0); - size_t allocated_size = statbuf.st_blocks; - CHECK_EQUAL(statbuf.st_size, dest_size); - CHECK(!int_multiply_with_overflow_detect(allocated_size, S_BLKSIZE)); - - // When performing prealloc, the OS has the option to preallocate more than the requeted space - // but we need to check that the preallocated space is within a reasonable bound. - // If space is being incorrectly preallocated (growing on each call) then we will have more than 3000KB - // of preallocated space, but if it is being allocated correctly (only when we need to expand) then we'll have - // a multiple of the optimal file system I/O operation (`stat -f %k .`) which is 4096 on HSF+. - // To give flexibility for file system prealloc implementations we check that the preallocated space is within - // at least 16 times the nominal requested size. - CHECK_LESS(allocated_size, 4096 * 16); - - CHECK(::close(fd) == 0); -#endif -} -#endif - TEST(File_PreallocResizingAPFSBug) { TEST_PATH(path); File file(path, File::mode_Write); CHECK(file.is_attached()); - file.write("aaaaaaaaaaaaaaaaaaaa"); // 20 a's + file.write(0, "aaaaaaaaaaaaaaaaaaaa"); // 20 a's // calling prealloc on a newly created file would sometimes fail on APFS with EINVAL via fcntl(F_PREALLOCATE) // this may not be the only way to trigger the error, but it does seem to be timing dependant. file.prealloc(100); CHECK_EQUAL(file.get_size(), 100); - // let's write past the first prealloc block (@ 4096) and verify it reads correctly too. - file.write("aaaaa"); // this will change the file size, but likely won't preallocate more space since the first call to prealloc // will probably have allocated a whole 4096 block. file.prealloc(200); CHECK_EQUAL(file.get_size(), 200); - file.write("aa"); + file.write(22, "aa"); file.prealloc(5020); // expands to another 4096 block constexpr size_t insert_pos = 5000; const char* insert_str = "hello"; - file.seek(insert_pos); - file.write(insert_str); - file.seek(insert_pos); + file.write(insert_pos, insert_str); CHECK_EQUAL(file.get_size(), 5020); constexpr size_t input_size = 6; char input[input_size]; - file.read(input, input_size); + file.read(insert_pos, input, input_size); CHECK_EQUAL(strncmp(input, insert_str, input_size), 0); } @@ -531,84 +545,6 @@ TEST(File_parent_dir) } } -TEST(File_GetUniqueID) -{ - TEST_PATH(path_1); - TEST_PATH(path_2); - TEST_PATH(path_3); - - File file1_1; - File file1_2; - File file2_1; - file1_1.open(path_1, File::mode_Write); - file1_2.open(path_1, File::mode_Read); - file2_1.open(path_2, File::mode_Write); - - // exFAT does not allocate inode numbers until the file is first non-empty - file1_1.resize(1); - file2_1.resize(1); - - File::UniqueID uid1_1 = file1_1.get_unique_id(); - File::UniqueID uid1_2 = file1_2.get_unique_id(); - File::UniqueID uid2_1 = file2_1.get_unique_id(); - std::optional uid2_2; - CHECK(uid2_2 = File::get_unique_id(path_2)); - - CHECK(uid1_1 == uid1_2); - CHECK(uid2_1 == *uid2_2); - CHECK(uid1_1 != uid2_1); - - // Path doesn't exist - CHECK_NOT(File::get_unique_id(path_3)); - - // Test operator< - File::UniqueID uid4_1{0, 5}; - File::UniqueID uid4_2{1, 42}; - CHECK(uid4_1 < uid4_2); - CHECK_NOT(uid4_2 < uid4_1); - - uid4_1 = {0, 1}; - uid4_2 = {0, 2}; - CHECK(uid4_1 < uid4_2); - CHECK_NOT(uid4_2 < uid4_1); - - uid4_1 = uid4_2; - CHECK_NOT(uid4_1 < uid4_2); - CHECK_NOT(uid4_2 < uid4_1); - - file1_1.resize(0); - file2_1.resize(0); - file2_1.resize(1); - file1_1.resize(1); - bool running_on_buggy_exfat = test_util::test_dir_is_exfat(); -#if TARGET_OS_MAC - if (__builtin_available(macOS 14, *)) { - running_on_buggy_exfat = false; - } -#endif - - if (!running_on_buggy_exfat) { - CHECK(uid1_1 == file1_1.get_unique_id()); - CHECK(uid2_1 == file2_1.get_unique_id()); - } - else { - std::string message = "The unique id of this Realm file has changed unexpectedly, this could be due to " - "modifications by an external process"; - std::string expected_1 = util::format("%1 '%2'", message, file1_1.get_path()); - std::string expected_2 = util::format("%1 '%2'", message, file2_1.get_path()); - // fat32/exfat could reuse or reassign uid after truncate - // there is not much to guarantee about the values of uids - // Our File class should detect this situation and throw an error. - // Once a Realm has been opened it should never be truncated to 0 so this is not expected - // to ever be thrown in normal Realm usage. - // One example of where this has caused problems is that the encryption layer stores - // encrypted mappings by a file's unique id. If the ids are not actually unique, then - // writes from one Realm may get placed into another Realm's mapping. - CHECK_THROW_EX(file1_1.get_unique_id(), FileAccessError, e.what() == expected_1); - CHECK_THROW_EX(file2_1.get_unique_id(), FileAccessError, e.what() == expected_2); - } -} - TEST(File_Temp) { auto tmp_file_name = make_temp_file("foo"); diff --git a/test/test_group.cpp b/test/test_group.cpp index 54cd141485b..6eb7a6fc10b 100644 --- a/test/test_group.cpp +++ b/test/test_group.cpp @@ -184,7 +184,7 @@ TEST(Group_BadFile) { File file(path_1, File::mode_Append); - file.write("foo"); + file.write(0, "foo"); } { @@ -214,7 +214,7 @@ TEST(Group_OpenBuffer) buffer_size = size_t(file.get_size()); buffer.reset(new char[buffer_size]); CHECK(bool(buffer)); - file.read(buffer.get(), buffer_size); + file.read(0, buffer.get(), buffer_size); } } diff --git a/test/test_json.cpp b/test/test_json.cpp index 92f28ae5039..39f61d2a981 100644 --- a/test/test_json.cpp +++ b/test/test_json.cpp @@ -201,7 +201,7 @@ bool json_test(std::string json, std::string expected_file, bool generate) std::string path = file_name + "bad_" + expected_file + ".json"; std::string pathOld = "bad_" + file_name; File out(path, File::mode_Write); - out.write(json); + out.write(0, json); std::cerr << "\n error result in '" << std::string(path) << "'\n"; return false; } diff --git a/test/test_shared.cpp b/test/test_shared.cpp index 78ede3b4a0c..ce6d6f0bca2 100644 --- a/test/test_shared.cpp +++ b/test/test_shared.cpp @@ -720,7 +720,7 @@ TEST(Shared_InitialMem_StaleFile) // delete it { File f(path, File::mode_Write); - f.write("text"); + f.write(0, "text"); } CHECK(File::exists(path)); CHECK(File::exists(path.get_lock_path())); @@ -2267,9 +2267,8 @@ TEST(Shared_EncryptionPageReadFailure) // make a corruption in the first data page util::File f(path, File::Mode::mode_Update); CHECK_GREATER(f.get_size(), 12288); // 4k iv page, then at least 2 pages - f.seek(5000); // somewhere on the first data page constexpr std::string_view data = "an external corruption in the encrypted page"; - f.write(data.data(), data.size()); + f.write(5000, data.data(), data.size()); // somewhere on the first data page f.sync(); f.close(); } @@ -3145,10 +3144,10 @@ TEST(Shared_LockFileOfWrongSizeThrows) // On Windows, we implement a shared lock on a file by locking the first byte of the file. Since // you cannot write to a locked region using WriteFile(), we use memory mapping which works fine, and // which is also the same method used by the .lock file initialization in SharedGroup::do_open() - char* mem = static_cast(f.map(realm::util::File::access_ReadWrite, 1)); + File::Map mem(f, realm::util::File::access_ReadWrite, 1); // set init_complete flag to 1 and sync - mem[0] = 1; + mem.get_addr()[0] = 1; f.sync(); CHECK_EQUAL(f.get_size(), wrong_size); @@ -3200,9 +3199,8 @@ TEST(Shared_LockFileOfWrongVersionThrows) File::UnlockGuard ug(f); CHECK(f.is_attached()); - f.seek(6); char bad_version = 0; - f.write(&bad_version, 1); + f.write(6, &bad_version, 1); f.sync(); mutex.lock(); @@ -3251,8 +3249,7 @@ TEST(Shared_LockFileOfWrongMutexSizeThrows) CHECK(f.is_attached()); char bad_mutex_size = sizeof(InterprocessMutex::SharedPart) + 1; - f.seek(1); - f.write(&bad_mutex_size, 1); + f.write(1, &bad_mutex_size, 1); f.sync(); mutex.lock(); @@ -3302,8 +3299,7 @@ TEST(Shared_LockFileOfWrongCondvarSizeThrows) CHECK(f.is_attached()); char bad_condvar_size = sizeof(InterprocessCondVar::SharedPart) + 1; - f.seek(2); - f.write(&bad_condvar_size, 1); + f.write(2, &bad_condvar_size, 1); f.sync(); mutex.lock(); @@ -4490,8 +4486,7 @@ TEST(Shared_ClearOnError_ResetInvalidFile) { // Overwrite the first byte of the mnemonic so that this isn't a valid file util::File file(path, File::mode_Update); - file.seek(8); - file.write("\0", 1); + file.write(8, "\0", 1); } { diff --git a/test/test_transactions.cpp b/test/test_transactions.cpp index d45458a4508..fbbdd314379 100644 --- a/test/test_transactions.cpp +++ b/test/test_transactions.cpp @@ -581,182 +581,4 @@ TEST(LangBindHelper_RollbackStringEnumInsert) CHECK(t->is_enumerated(col)); } -#if 0 -// The following code is a longer running test, so disabled when for ordinary testing - -void growth_phase(SharedGroup& sg_w) -{ - std::cout << "Growing..." << std::endl; - for (int j = 0; j < 100; ++j) { - //std::cout << "growth phase " << j << std::endl; - WriteTransaction wt(sg_w); - Group& g = wt.get_group(); - TableRef t = g.get_table("spoink"); - for (int k = 0; k < 50000; ++k) { - auto row = t->add_empty_row(); - t->set_string(0, row, "yooodle-de-do-glabtyligok-pluut"); - } - //std::cout << " - commit" << std::endl; - wt.commit(); - } -} - -void query_phase(SharedGroup& sg_w) -{ - std::cout << "Querying..." << std::endl; - for (int j = 0; j < 1; ++j) { - //std::cout << "growth phase " << j << std::endl; - ReadTransaction wt(sg_w); - const Group& g = wt.get_group(); - ConstTableRef t = g.get_table("spoink"); - TableView tv = t->where().equal(0,"gylle").find_all(); - } -} - -void partial_read_phase(SharedGroup& sg_w) -{ - std::cout << "Reading..." << std::endl; - for (int j = 0; j < 100; ++j) { - //std::cout << "growth phase " << j << std::endl; - ReadTransaction wt(sg_w); - const Group& g = wt.get_group(); - ConstTableRef t = g.get_table("spoink"); - int max = t->size(); - for (int z = 0; z < max/100; ++z) { - t->get_string(0,z); - } - } -} - -void modification_phase(SharedGroup& sg_w) -{ - std::cout << "Modifying..." << std::endl; - int row = 0; - for (int j = 0; j < 100; ++j) { - //std::cout << "growth phase " << j << std::endl; - WriteTransaction wt(sg_w); - Group& g = wt.get_group(); - TableRef t = g.get_table("spoink"); - int max = t->size(); - for (int k = 0; k < 100000; ++k) { - if (row == max) row = 0; - std::string s("yooodle-glabtyligok-plut-fnytliandomcrackplaf!"); - s = s + to_string(j); - t->set_string(0, row, s); - ++row; - } - //std::cout << " - commit" << std::endl; - wt.commit(); - } -} - -void preparations(SharedGroup& sg_w) -{ - std::cout << "Setup...." << std::endl; - { - WriteTransaction wt(sg_w); - Group& g = wt.get_group(); - TableRef t = g.get_table("spoink"); - if (t.get() == nullptr) { - t = g.add_table("spoink"); - t->add_column(type_String,"spoink-column"); - } - wt.commit(); - } -} - -// illustration of possible governor function which takes total system load into account -class ExampleGovernor : public util::PageReclaimGovernor { -public: - util::UniqueFunction current_target_getter(size_t load) override - { - return std::bind(file_control_governor, load); - } - void report_target_result(int64_t) override - { - } - -private: - static size_t system_memory_governor(size_t load) { - try { - auto file = fopen("/proc/meminfo","r"); - if (file == nullptr) - return 0; - size_t total, free; - int r = fscanf(file,"MemTotal: %zu kB MemFree: %zu kB", &total, &free); - if (r != 2) - return 0; - fclose(file); - size_t target; - /* - if (free < total * 0.25) - target = size_t(load * 0.9); - else if (free < total * 0.3) - target = load; - else - target = size_t(load * 2.1); - if (target > total * 768) - target = total * 768; - */ - target = total * 256; - std::cout << "total: " << total << " free: " << free - << " load: " << load << " target: " << target << " \r"; - return target; - } catch (...) { - return 0; - } - } - - static size_t file_control_governor(size_t load) { - try { - auto file = fopen("governor.txt", "r"); - if (file == nullptr) - return system_memory_governor(load); - size_t target; - int r = fscanf(file, "%zu", &target); - if (r != 1) - return system_memory_governor(load); - fclose(file); - std::cout << "Encryption: active data = " << load << " set target: " << target << " \r"; - return target; - } catch (...) { - return system_memory_governor(load); - } - } -}; - -ExampleGovernor example_governor; - -ONLY(LangBindHelper_EncryptionGiga) -{ - //realm::util::set_page_reclaim_governor(&example_governor); - std::string path1 = "dont_try_this_at_home1.realm"; - std::unique_ptr hist_w1(make_in_realm_history()); - - std::cout << "Opening..." << path1 << std::endl; - SharedGroup sg_w1(*hist_w1, path1, SharedGroupOptions(crypt_key())); - preparations(sg_w1); - - std::string path2 = "dont_try_this_at_home2.realm"; - std::unique_ptr hist_w2(make_in_realm_history()); - - std::cout << "Opening..." << path2 << std::endl; - SharedGroup sg_w2(*hist_w2, path2, SharedGroupOptions(crypt_key())); - preparations(sg_w2); - for (int r = 0; r < 4; ++r) { - growth_phase(sg_w1); - growth_phase(sg_w2); - modification_phase(sg_w1); - modification_phase(sg_w2); - partial_read_phase(sg_w1); - partial_read_phase(sg_w2); - query_phase(sg_w1); - query_phase(sg_w2); - std::cout << "Sleeping.." << std::endl; - millisleep(10000); - } - millisleep(100000); -} -#endif - #endif // TEST_TRANSACTIONS diff --git a/test/test_transform.cpp b/test/test_transform.cpp index e413dcc8bd5..9cb07fa8fdc 100644 --- a/test/test_transform.cpp +++ b/test/test_transform.cpp @@ -1,24 +1,3 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - #include "test.hpp" #include "testsettings.hpp" #include "util/quote.hpp" @@ -28,6 +7,28 @@ #include "util/compare_groups.hpp" #include "util/dump_changesets.hpp" +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + extern unsigned int unit_test_random_seed; namespace { diff --git a/test/test_upgrade_database.cpp b/test/test_upgrade_database.cpp index da76d0542bf..ae95d1a02da 100644 --- a/test/test_upgrade_database.cpp +++ b/test/test_upgrade_database.cpp @@ -143,8 +143,8 @@ static void compare_files(test_util::unit_test::TestContext& test_context, const auto old_buffer = std::make_unique(old_size); auto new_buffer = std::make_unique(old_size); - old_file.read(old_buffer.get(), old_size); - new_file.read(new_buffer.get(), old_size); + old_file.read(0, old_buffer.get(), old_size); + new_file.read(0, new_buffer.get(), old_size); CHECK_NOT(memcmp(old_buffer.get(), new_buffer.get(), old_size)); } diff --git a/test/test_util_logger.cpp b/test/test_util_logger.cpp index fc8e5c13cc4..5443661b93c 100644 --- a/test/test_util_logger.cpp +++ b/test/test_util_logger.cpp @@ -280,7 +280,7 @@ TEST(Util_Logger_File_1) std::unique_ptr buffer(new char[size]); util::File file(path); if (CHECK_EQUAL(size, file.get_size())) { - file.read(buffer.get(), size); + file.read(0, buffer.get(), size); CHECK(str == std::string(buffer.get(), size)); } } @@ -304,7 +304,7 @@ TEST(Util_Logger_File_2) std::unique_ptr buffer(new char[size]); util::File file(path); if (CHECK_EQUAL(size, file.get_size())) { - file.read(buffer.get(), size); + file.read(0, buffer.get(), size); CHECK(str == std::string(buffer.get(), size)); } } diff --git a/test/tsan.suppress b/test/tsan.suppress index 987a4364332..dd1b4c3e1e0 100644 --- a/test/tsan.suppress +++ b/test/tsan.suppress @@ -1,14 +1,5 @@ # ThreadSanitizer suppressions file for realm-core -# `AESCryptor::read()` and`copy_up_to_date_page()` copy entire pages. -# They may overwrite something which -# is being read concurrently. The reason it is benign, is that whenever there is a -# race, it overwrites with the same value as is already there, so the reader sees -# the correct value. This is all by design. - -race:realm::util::AESCryptor::read -race:realm::util::EncryptedFileMapping::copy_up_to_date_page - # Avoid a false positive instance of lock-order-inversion. # SyncManager::m_sessions_mutex and SyncSession::m_state_mutex are locked # in this order when a SyncSession is created, and in reverse order when diff --git a/test/util/spawned_process.cpp b/test/util/spawned_process.cpp index a40421fc351..95be3c7b631 100644 --- a/test/util/spawned_process.cpp +++ b/test/util/spawned_process.cpp @@ -19,6 +19,7 @@ #include "spawned_process.hpp" #include +#include #include #include "test_path.hpp" diff --git a/test/util/test_path.hpp b/test/util/test_path.hpp index b4df71f3f77..4c398fcae64 100644 --- a/test/util/test_path.hpp +++ b/test/util/test_path.hpp @@ -98,11 +98,15 @@ class TestPathGuard { public: TestPathGuard(const std::string& path); ~TestPathGuard() noexcept; - operator std::string() const + operator const std::string&() const noexcept { return m_path; } - operator StringData() const + operator StringData() const noexcept + { + return m_path; + } + operator std::string_view() const noexcept { return m_path; }