Skip to content

Commit

Permalink
[WIP] Add an alternative method for reading files
Browse files Browse the repository at this point in the history
Using mmap only seems to be very slow with some NAS
  • Loading branch information
JeromeMartinez committed Oct 15, 2024
1 parent fce3a36 commit 7b285eb
Show file tree
Hide file tree
Showing 8 changed files with 209 additions and 14 deletions.
41 changes: 41 additions & 0 deletions Source/CLI/Global.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,33 @@ int global::SetHash(bool Value)
return 0;
}

//---------------------------------------------------------------------------
int global::SetIo(const char* Value)
{
if (strcmp(Value, "mmap") == 0)
{
OpenStyle = filemap::style::mmap;
return 0;
}
if (strcmp(Value, "fstream") == 0)
{
OpenStyle = filemap::style::fstream;
return 0;
}
if (strcmp(Value, "fopen") == 0)
{
OpenStyle = filemap::style::fopen;
return 0;
}
if (strcmp(Value, "open") == 0)
{
OpenStyle = filemap::style::open;
return 0;
}
cerr << "Error: unknown io value '" << Value << "'." << endl;
return 1;
}

//---------------------------------------------------------------------------
int global::SetAll(bool Value)
{
Expand Down Expand Up @@ -432,6 +459,7 @@ int global::ManageCommandLine(const char* argv[], int argc)
IgnoreLicenseKey = !License.IsSupported_License();
SubLicenseId = 0;
SubLicenseDur = 1;
OpenStyle = {}; // (filemap::style)-1;
ShowLicenseKey = false;
StoreLicenseKey = false;
DisplayCommand = false;
Expand Down Expand Up @@ -748,6 +776,14 @@ int global::ManageCommandLine(const char* argv[], int argc)
if (auto Value = SetAcceptFiles())
return Value;
}
else if (strcmp(argv[i], "--io") == 0)
{
if (i + 1 == argc)
return Error_Missing(argv[i]);
int Value = SetIo(argv[++i]);
if (Value)
return Value;
}
else if (!strcmp(argv[i], "-framerate"))
{
if (OptionsForOtherFiles)
Expand Down Expand Up @@ -831,6 +867,11 @@ int global::ManageCommandLine(const char* argv[], int argc)
}
if (License.ShowLicense(ShowLicenseKey, SubLicenseId, SubLicenseDur))
return 1;
if (OpenStyle == (filemap::style)-1)
{
cerr << "\nThis is a test version, please use another version if you don't know which option to test\n" << endl;
return 1;
}
if (Inputs.empty() && (ShowLicenseKey || SubLicenseId))
return 0;

Expand Down
2 changes: 2 additions & 0 deletions Source/CLI/Global.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class global
string LicenseKey;
uint64_t SubLicenseId;
uint64_t SubLicenseDur;
filemap::style OpenStyle;
bool IgnoreLicenseKey;
bool ShowLicenseKey;
bool StoreLicenseKey;
Expand Down Expand Up @@ -100,6 +101,7 @@ class global
int SetFrameMd5An(bool Value);
int SetFrameMd5FileName(const char* FileName);
int SetHash(bool Value);
int SetIo(const char* Value);
int SetAll(bool Value);

private:
Expand Down
11 changes: 8 additions & 3 deletions Source/CLI/Main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -481,7 +481,7 @@ int ParseFile_Uncompressed(parse_info& ParseInfo, size_t Files_Pos)
}

//---------------------------------------------------------------------------
int ParseFile_Compressed(parse_info& ParseInfo)
int ParseFile_Compressed(parse_info& ParseInfo, const string* FileName)
{
// Init
string OutputDirectoryName;
Expand Down Expand Up @@ -522,6 +522,11 @@ int ParseFile_Compressed(parse_info& ParseInfo)
matroska* M = new matroska(OutputDirectoryName, &Global.Mode, Ask_Callback, Thread_Pool, &Global.Errors);
M->Quiet = Global.Quiet;
M->NoOutputCheck = NoOutputCheck;
if (RAWcooked.OutputFileName.empty() && FileName)
{
RAWcooked.OutputFileName = *FileName;
M->OpenStyle = Global.OpenStyle;
}
if (ParseInfo.ParseFile_Input(*M))
{
ReturnValue = 1;
Expand Down Expand Up @@ -591,7 +596,7 @@ int ParseFile(size_t Files_Pos)
return 1;

// Compressed content
if (int Value = ParseFile_Compressed(ParseInfo))
if (int Value = ParseFile_Compressed(ParseInfo, ParseInfo.Name))
return Value;
if (ParseInfo.IsDetected)
return 0;
Expand Down Expand Up @@ -772,7 +777,7 @@ int main(int argc, const char* argv[])
// Parse (check mode)
Global.Actions.set(Action_QuickCheckAfterEncode, !Global.Actions[Action_Check]);
Global.Actions.set(Action_Decode, false); // Override config
Value = ParseFile_Compressed(ParseInfo);
Value = ParseFile_Compressed(ParseInfo, ParseInfo.Name);
if (!Value && !ParseInfo.IsDetected)
{
cout << '\n' << "Error: " << Global.OutputFileName << endl;
Expand Down
25 changes: 21 additions & 4 deletions Source/Lib/Compressed/Matroska/Matroska.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,13 +359,16 @@ void matroska::ParseBuffer()
// Check if we can indicate the system that we'll not need anymore memory below this value, without indicating it too much
if (Buffer_Offset > Buffer_Offset_LowerLimit + 1024 * 1024 && Buffer_Offset < Buffer.Size()) // TODO: when multi-threaded frame decoding is implemented, we need to check that all thread don't need anymore memory below this value
{
FileMap->Remap();
FileMap->Remap(Buffer_Offset, Buffer_Offset + 256 * 1024 * 1024);
Buffer = *FileMap;
if (OpenStyle == filemap::style::mmap)
{
if (ReversibilityData)
ReversibilityData->SetBaseData(Buffer.Data());
for (const auto& TrackInfo_Current : TrackInfo)
if (TrackInfo_Current && TrackInfo_Current->ReversibilityData)
TrackInfo_Current->ReversibilityData->SetBaseData(Buffer.Data());
}
Buffer_Offset_LowerLimit = Buffer_Offset;
}

Expand All @@ -376,13 +379,16 @@ void matroska::ParseBuffer()
Buffer_Offset = Cluster_Offset;
Cluster_Level = (size_t)-1;

FileMap->Remap();
FileMap->Remap(Buffer_Offset, 256 * 1024 * 1024);
Buffer = *FileMap;
if (OpenStyle == filemap::style::mmap)
{
if (ReversibilityData)
ReversibilityData->SetBaseData(Buffer.Data());
for (const auto& TrackInfo_Current : TrackInfo)
if (TrackInfo_Current && TrackInfo_Current->ReversibilityData)
TrackInfo_Current->ReversibilityData->SetBaseData(Buffer.Data());
}
Buffer_Offset_LowerLimit = Buffer_Offset;
}
}
Expand Down Expand Up @@ -799,6 +805,11 @@ void matroska::Segment_Attachments_AttachedFile_FileData_RawCookedTrack_LibraryV
//---------------------------------------------------------------------------
void matroska::Segment_Cluster()
{
IsList = true;

if (FileMap2)
return;

if (RAWcooked_LibraryName.empty())
{
memcpy(Cluster_Levels, Levels, sizeof(Levels));
Expand All @@ -808,8 +819,6 @@ void matroska::Segment_Cluster()
return;
}

IsList = true;

// Check if Hashes check is useful
if (Hashes_FromRAWcooked)
{
Expand Down Expand Up @@ -850,6 +859,14 @@ void matroska::Segment_Cluster()
Errors->Error(IO_FileChecker, error::type::Undecodable, (error::generic::code)filechecker_issue::undecodable::Format_Undetected, string());
if (ReversibilityData && !FrameWriter_Template->Compound)
InitOutput_Find();

FileMap2 = FileMap;
if (OpenStyle != filemap::style::mmap)
{
FileMap = new filemap;
FileMap->Open_ReadMode(*this->FileName, OpenStyle, 0, 256 * 1024 * 1024);
Buffer = *FileMap;
}
}

//---------------------------------------------------------------------------
Expand Down
123 changes: 121 additions & 2 deletions Source/Lib/Utils/FileIO/FileIO.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "Lib/Utils/FileIO/FileIO.h"
#include <iostream>
#include <sstream>
#include <fstream>
#if defined(_WIN32) || defined(_WINDOWS)
#include "windows.h"
#include <io.h> // File existence
Expand All @@ -29,10 +30,82 @@
//---------------------------------------------------------------------------

//---------------------------------------------------------------------------
int filemap::Open_ReadMode(const char* FileName)
struct private_buffered
{
void* F;
size_t Data_Shift = 0;
size_t MaxSize = 0;
};

//---------------------------------------------------------------------------
int filemap::Open_ReadMode(const char* FileName, style NewStyle, size_t Begin, size_t End)
{
Close();

if (NewStyle != style::mmap)
{
Style = NewStyle;
private_buffered* P = new private_buffered;
P->MaxSize = End - Begin;
size_t FileSize;

switch (Style)
{
default: // case style::fstream:
{
auto F = new ifstream(FileName, ios::binary);
F->seekg(0, F->end);
FileSize = F->tellg();
F->seekg(Begin, F->beg);
P->F = F;
break;
}
case style::fopen:
{
auto F = fopen(FileName, "rb");
fseek(F, 0, SEEK_END);
FileSize = ftell(F);
fseek(F, (long)Begin, SEEK_SET);
P->F = F;
break;
}
case style::open:
{
#if defined(_WIN32) || defined(_WINDOWS)
DWORD FileSizeHigh;
auto NewFile = CreateFileA(FileName, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
auto FileSizeLow = GetFileSize(NewFile, &FileSizeHigh);
if ((FileSizeLow != INVALID_FILE_SIZE || GetLastError() == NO_ERROR) // If no error (special case with 32-bit max value)
&& (!FileSizeHigh || sizeof(size_t) >= 8)) // Mapping 4+ GiB files is not supported in 32-bit mode
{
FileSize = ((size_t)FileSizeHigh) << 32 | FileSizeLow;
}
else
return 1;
if (Begin)
{
LARGE_INTEGER GoTo;
GoTo.QuadPart = Begin;
if (!SetFilePointerEx(NewFile, GoTo, nullptr, 0))
return 1;
P->Data_Shift = Begin;
}
P->F = NewFile;
#else //defined(_WIN32) || defined(_WINDOWS)
return 1;
#endif //defined(_WIN32) || defined(_WINDOWS)
break;
}
}

auto Buffer = new uint8_t[P->MaxSize];
P->Data_Shift -= P->MaxSize;
AssignBase(Buffer - P->Data_Shift, FileSize);
Private2 = (decltype(Private2))P;

return Remap(Begin, End);
}

size_t NewSize;
#if defined(_WIN32) || defined(_WINDOWS)
auto NewFile = CreateFileA(FileName, GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, 0);
Expand Down Expand Up @@ -99,12 +172,58 @@ inline int munmap_const(const void* addr, size_t length)
#pragma GCC diagnostic pop
#endif
#endif
int filemap::Remap()
int filemap::Remap(size_t Begin, size_t End)
{
// Special case for 0-byte files
if (Empty())
return 0;

if (Style != style::mmap)
{
auto P = (private_buffered*)Private2;
auto Buffer = Data() + P->Data_Shift;
auto Buffer_MaxSize = P->MaxSize;
Begin -= P->Data_Shift;
if (!End)
End = Size();
End -= P->Data_Shift;
auto Buffer_Middle = Buffer + Begin;
auto Buffer_Middle_Size = Buffer_MaxSize - Begin;
memmove((void*)Buffer, (void*)Buffer_Middle, Buffer_Middle_Size);
P->Data_Shift += Begin;
AssignKeepSizeBase(Buffer - P->Data_Shift);
Buffer += Buffer_Middle_Size;
Buffer_MaxSize -= Buffer_Middle_Size;

switch (Style)
{
default: // case style::fstream:
{
auto F = (ifstream*)P->F;
F->read((char*)Buffer, Buffer_MaxSize);
break;
}
case style::fopen:
{
auto F = (FILE*)P->F;
if (fread((char*)Buffer, Buffer_MaxSize, 1, F) != 1)
return 1;
break;
}
case style::open:
{
#if defined(_WIN32) || defined(_WINDOWS)
ReadFile(P->F, (LPVOID)Buffer, (DWORD)Buffer_MaxSize, nullptr, 0);
#else //defined(_WIN32) || defined(_WINDOWS)
return 1;
#endif //defined(_WIN32) || defined(_WINDOWS)
break;
}
}

return 0;
}

// Close previous map
if (Data())
{
Expand Down
16 changes: 12 additions & 4 deletions Source/Lib/Utils/FileIO/FileIO.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,27 @@ class filemap : public buffer_view
~filemap() { Close(); }

// Actions
int Open_ReadMode(const char* FileName);
int Open_ReadMode(const string& FileName) { return Open_ReadMode(FileName.c_str()); }
enum class style
{
mmap,
fstream,
fopen,
open,
};
int Open_ReadMode(const char* FileName, style NewStyle = {}, size_t Begin = {}, size_t End = {});
int Open_ReadMode(const string& FileName, style NewStyle = {}, size_t Begin = {}, size_t End = {}) { return Open_ReadMode(FileName.c_str(), NewStyle, Begin, End); }
bool IsOpen() { return Private == (decltype(Private))-1 ? false : true; }
int Remap();
int Remap(size_t Begin = 0, size_t End = 0);
int Close();

private:
#if defined(_WIN32) || defined(_WINDOWS)
void* Private = (void*)-1;
void* Private2 = (void*)-1;
#else //defined(_WIN32) || defined(_WINDOWS)
int Private = (int)-1;
#endif //defined(_WIN32) || defined(_WINDOWS)
void* Private2 = (void*)-1;
style Style = {};
};

class file
Expand Down
1 change: 1 addition & 0 deletions Source/Lib/Utils/FileIO/Input_Base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ input_base::~input_base()
bool input_base::Parse(filemap* FileMap_Source, const buffer_view& Buffer_Source, size_t FileSize_Source)
{
FileMap = FileMap_Source;
FileMap2 = nullptr;
FileSize = FileSize_Source == (size_t)-1 ? Buffer_Source.Size() : FileSize_Source;
Buffer = Buffer_Source;
HashComputed = false;
Expand Down
Loading

0 comments on commit 7b285eb

Please sign in to comment.